diff options
| author | Mate Kukri <km@mkukri.xyz> | 2024-05-20 20:48:44 +0100 | 
|---|---|---|
| committer | Mate Kukri <km@mkukri.xyz> | 2024-05-20 20:48:44 +0100 | 
| commit | abe6717c332238e46fd56195b645cdb024f2db90 (patch) | |
| tree | 7bc5189d6d2ceb49c29b0b4f8f89f49242695f3d /config | |
| parent | 1e54db29897786ff49b8ff228ed0b2fbdd8b70dc (diff) | |
Add NVMe support to GRUB2 payload
Tested on OptiPlex 3050 (via injecting grub2.elf into WIP coreboot
port).
Diffstat (limited to 'config')
| -rw-r--r-- | config/grub/config/grub.cfg | 45 | ||||
| -rw-r--r-- | config/grub/modules.list | 1 | ||||
| -rw-r--r-- | config/grub/patches/0006-nvme/0001-Add-native-NVMe-driver-based-on-SeaBIOS.patch | 1074 | 
3 files changed, 1113 insertions, 7 deletions
| diff --git a/config/grub/config/grub.cfg b/config/grub/config/grub.cfg index 3bbd2ecc..6b36d90b 100644 --- a/config/grub/config/grub.cfg +++ b/config/grub/config/grub.cfg @@ -76,10 +76,20 @@ function search_grub {  	echo -n "Attempting to load grub.cfg from '${1}' devices"  	for i in 0 1 2 3 4 5 6 7 8 9 10 11; do  		for part in 1 2 3 4 5 6 7 8 9 10 11 12 13 14 15 16 17 18 19 20; do -			try_user_config "(${1}${i},${part})" +			if [ "${1}" != "nvme" ]; then +				try_user_config "(${1}${i},${part})" +			else +				# TODO: do we care about other namesapces +				try_user_config "(nvme${i}n1,${part})" +			fi  		done -		# raw devices e.g. (ahci0) instead of (ahci0,1) -		try_user_config "(${1}${i})" +		if [ "${1}" != "nvme" ]; then +			# raw devices e.g. (ahci0) instead of (ahci0,1) +			try_user_config "(${1}${i})" +		else +			# TODO: do we care about other namesapces +			try_user_config "(nvme${i}n1)" +		fi  	done  	echo # Insert newline  } @@ -102,10 +112,20 @@ function search_isolinux {  	echo "\nAttempting to parse iso/sys/extlinux config from '${1}' devices"  	for i in 0 1 2 3 4 5 6 7 8 9 10 11; do  		for part in 1 2 3 4 5 6 7 8 9 10 11 12 13 14 15 16 17 18 19 20; do -			try_isolinux_config "(${1}${i},${part})" +			if [ "${1}" != "nvme" ]; then +				try_isolinux_config "(${1}${i},${part})" +			else +				# TODO: see above +				try_isolinux_config "(nvme${i}n1,${part})" +			fi  		done -		# raw devices e.g. (usb0) instead of (usb0,1) -		try_isolinux_config "(${1}${i})" +		if [ "${1}" != "nvme" ]; then +			# raw devices e.g. (usb0) instead of (usb0,1) +			try_isolinux_config "(${1}${i})" +		else +			# TODO: do we care about other namesapces +			try_isolinux_config "(nvme${i}n1)" +		fi  	done  	echo # Insert newline  } @@ -125,6 +145,9 @@ menuentry 'Load Operating System (incl. fully encrypted disks)  [o]' --hotkey='o  	if [ "${grub_scan_disk}" != "ahci" ]; then  		search_bootcfg ata  	fi +	if [ "${grub_scan_disk}" != "nvme" ]; then +		search_bootcfg nvme +	fi  	# grub device enumeration is very slow, so checks are hardcoded @@ -143,6 +166,7 @@ menuentry 'Load Operating System (incl. fully encrypted disks)  [o]' --hotkey='o  	unset ahcidev  	unset atadev +	unset nvmedev  	for i in 11 10 9 8 7 6 5 4 3 2 1 0; do  		for part in 20 19 18 17 16 15 14 13 12 11 10 9 8 7 6 5 4 3 2 1; do  			if [ "${grub_scan_disk}" != "ata" ]; then @@ -151,6 +175,10 @@ menuentry 'Load Operating System (incl. fully encrypted disks)  [o]' --hotkey='o  			if [ "${grub_scan_disk}" != "ahci" ]; then  				atadev="(ata${i},${part}) ${atadev}"  			fi +			if [ "${grub_scan_disk}" != "nvme" ]; then +				# TODO: do we care about other namesapces +				nvmedev="(nvme${i}n1,${part}) ${nvmedev}" +			fi  		done  	done @@ -185,6 +213,9 @@ menuentry 'Search for GRUB/SYSLINUX/EXTLINUX/ISOLINUX on AHCI  [a]' --hotkey='a'  menuentry 'Search for GRUB/SYSLINUX/EXTLINUX/ISOLINUX on ATA/IDE  [d]' --hotkey='d' {  	search_bootcfg ata  } +menuentry 'Search for GRUB/SYSLINUX/EXTLINUX/ISOLINUX on NVMe  [e]' --hotkey='e' { +	search_bootcfg nvme +}  if [ -f (cbfsdisk)/grubtest.cfg ]; then  menuentry 'Load test configuration (grubtest.cfg) inside of CBFS  [t]' --hotkey='t' {  	set root='(cbfsdisk)' @@ -246,4 +277,4 @@ submenu 'Other  [z]' --hotkey='z' {  	menuentry 'Disable spkmodem  [z]' --hotkey='z' {  		terminal_output --remove spkmodem  	} -}
\ No newline at end of file +} diff --git a/config/grub/modules.list b/config/grub/modules.list index f3768adb..5943a9ae 100644 --- a/config/grub/modules.list +++ b/config/grub/modules.list @@ -105,6 +105,7 @@ multiboot2 \  nativedisk \  normal \  ntfs \ +nvme \  ohci \  part_bsd \  part_dfly \ diff --git a/config/grub/patches/0006-nvme/0001-Add-native-NVMe-driver-based-on-SeaBIOS.patch b/config/grub/patches/0006-nvme/0001-Add-native-NVMe-driver-based-on-SeaBIOS.patch new file mode 100644 index 00000000..b00611a0 --- /dev/null +++ b/config/grub/patches/0006-nvme/0001-Add-native-NVMe-driver-based-on-SeaBIOS.patch @@ -0,0 +1,1074 @@ +From ef9d61ae9ed490301adf9ee064a9fc1190069d81 Mon Sep 17 00:00:00 2001 +From: Mate Kukri <km@mkukri.xyz> +Date: Mon, 20 May 2024 11:43:35 +0100 +Subject: Add native NVMe driver based on SeaBIOS + +Tested to successfully boot Debian on QEMU and OptiPlex 3050. + +Signed-off-by: Mate Kukri <km@mkukri.xyz> +--- + Makefile.am                     |   2 +- + grub-core/Makefile.core.def     |   6 + + grub-core/commands/nativedisk.c |   1 + + grub-core/disk/nvme-int.h       | 208 +++++++++ + grub-core/disk/nvme.c           | 781 ++++++++++++++++++++++++++++++++ + include/grub/disk.h             |   1 + + 6 files changed, 998 insertions(+), 1 deletion(-) + create mode 100644 grub-core/disk/nvme-int.h + create mode 100644 grub-core/disk/nvme.c + +diff --git a/Makefile.am b/Makefile.am +index 65016f856..7bc0866ba 100644 +--- a/Makefile.am ++++ b/Makefile.am +@@ -434,7 +434,7 @@ if COND_i386_coreboot + FS_PAYLOAD_MODULES ?= $(shell cat grub-core/fs.lst) + default_payload.elf: grub-mkstandalone grub-mkimage FORCE + 	test -f $@ && rm $@ || true +-	pkgdatadir=. ./grub-mkstandalone --grub-mkimage=./grub-mkimage -O i386-coreboot -o $@ --modules='ahci pata xhci ehci uhci ohci usb_keyboard usbms part_msdos ext2 fat at_keyboard part_gpt usbserial_usbdebug cbfs' --install-modules='ls linux search configfile normal cbtime cbls memrw iorw minicmd lsmmap lspci halt reboot hexdump pcidump regexp setpci lsacpi chain test serial multiboot cbmemc linux16 gzio echo help syslinuxcfg xnu $(FS_PAYLOAD_MODULES) password_pbkdf2 $(EXTRA_PAYLOAD_MODULES)' --fonts= --themes= --locales= -d grub-core/ /boot/grub/grub.cfg=$(srcdir)/coreboot.cfg ++	pkgdatadir=. ./grub-mkstandalone --grub-mkimage=./grub-mkimage -O i386-coreboot -o $@ --modules='ahci pata nvme xhci ehci uhci ohci usb_keyboard usbms part_msdos ext2 fat at_keyboard part_gpt usbserial_usbdebug cbfs' --install-modules='ls linux search configfile normal cbtime cbls memrw iorw minicmd lsmmap lspci halt reboot hexdump pcidump regexp setpci lsacpi chain test serial multiboot cbmemc linux16 gzio echo help syslinuxcfg xnu $(FS_PAYLOAD_MODULES) password_pbkdf2 $(EXTRA_PAYLOAD_MODULES)' --fonts= --themes= --locales= -d grub-core/ /boot/grub/grub.cfg=$(srcdir)/coreboot.cfg + endif +  + endif +diff --git a/grub-core/Makefile.core.def b/grub-core/Makefile.core.def +index ea782666d..0f893369a 100644 +--- a/grub-core/Makefile.core.def ++++ b/grub-core/Makefile.core.def +@@ -2602,3 +2602,9 @@ module = { +   efi = commands/bli.c; +   enable = efi; + }; ++ ++module = { ++  name = nvme; ++  common = disk/nvme.c; ++  enable = pci; ++}; +diff --git a/grub-core/commands/nativedisk.c b/grub-core/commands/nativedisk.c +index 580c8d3b0..a2c766fbd 100644 +--- a/grub-core/commands/nativedisk.c ++++ b/grub-core/commands/nativedisk.c +@@ -78,6 +78,7 @@ get_uuid (const char *name, char **uuid, int getnative) +     case GRUB_DISK_DEVICE_ATA_ID: +     case GRUB_DISK_DEVICE_SCSI_ID: +     case GRUB_DISK_DEVICE_XEN: ++    case GRUB_DISK_DEVICE_NVME_ID: +       if (getnative) + 	break; +       /* FALLTHROUGH */ +diff --git a/grub-core/disk/nvme-int.h b/grub-core/disk/nvme-int.h +new file mode 100644 +index 000000000..1295b58aa +--- /dev/null ++++ b/grub-core/disk/nvme-int.h +@@ -0,0 +1,208 @@ ++// NVMe datastructures and constants ++// ++// Copyright 2017 Amazon.com, Inc. or its affiliates. ++// ++// This file may be distributed under the terms of the GNU LGPLv3 license. ++ ++#ifndef __NVME_INT_H ++#define __NVME_INT_H ++ ++#include <grub/types.h> ++ ++/* Data structures */ ++ ++/* The register file of a NVMe host controller. This struct follows the naming ++   scheme in the NVMe specification. */ ++struct nvme_reg { ++    grub_uint64_t cap;                    /* controller capabilities */ ++    grub_uint32_t vs;                     /* version */ ++    grub_uint32_t intms;                  /* interrupt mask set */ ++    grub_uint32_t intmc;                  /* interrupt mask clear */ ++    grub_uint32_t cc;                     /* controller configuration */ ++    grub_uint32_t _res0; ++    grub_uint32_t csts;                   /* controller status */ ++    grub_uint32_t _res1; ++    grub_uint32_t aqa;                    /* admin queue attributes */ ++    grub_uint64_t asq;                    /* admin submission queue base address */ ++    grub_uint64_t acq;                    /* admin completion queue base address */ ++}; ++ ++/* Submission queue entry */ ++struct nvme_sqe { ++    union { ++        grub_uint32_t dword[16]; ++        struct { ++            grub_uint32_t cdw0;           /* Command DWORD 0 */ ++            grub_uint32_t nsid;           /* Namespace ID */ ++            grub_uint64_t _res0; ++            grub_uint64_t mptr;           /* metadata ptr */ ++ ++            grub_uint64_t dptr_prp1; ++            grub_uint64_t dptr_prp2; ++        }; ++    }; ++}; ++ ++/* Completion queue entry */ ++struct nvme_cqe { ++    union { ++        grub_uint32_t dword[4]; ++        struct { ++            grub_uint32_t cdw0; ++            grub_uint32_t _res0; ++            grub_uint16_t sq_head; ++            grub_uint16_t sq_id; ++            grub_uint16_t cid; ++            grub_uint16_t status; ++        }; ++    }; ++}; ++ ++/* The common part of every submission or completion queue. */ ++struct nvme_queue { ++    grub_uint32_t *dbl;                   /* doorbell */ ++    grub_uint16_t mask;                   /* length - 1 */ ++}; ++ ++struct nvme_cq { ++    struct nvme_queue common; ++    struct nvme_cqe *cqe; ++ ++    /* We have read upto (but not including) this entry in the queue. */ ++    grub_uint16_t head; ++ ++    /* The current phase bit the controller uses to indicate that it has written ++       a new entry. This is inverted after each wrap. */ ++    unsigned phase : 1; ++}; ++ ++struct nvme_sq { ++    struct nvme_queue common; ++    struct nvme_sqe *sqe; ++ ++    /* Corresponding completion queue. We only support a single SQ per CQ. */ ++    struct nvme_cq *cq; ++ ++    /* The last entry the controller has fetched. */ ++    grub_uint16_t head; ++ ++    /* The last value we have written to the tail doorbell. */ ++    grub_uint16_t tail; ++}; ++ ++struct nvme_ctrl { ++    grub_pci_device_t pci; ++    struct nvme_reg volatile *reg; ++ ++    grub_uint32_t ctrlnum; ++ ++    grub_uint32_t doorbell_stride;        /* in bytes */ ++ ++    struct nvme_sq admin_sq; ++    struct nvme_cq admin_cq; ++ ++    grub_uint32_t ns_count; ++ ++    struct nvme_sq io_sq; ++    struct nvme_cq io_cq; ++}; ++ ++struct nvme_namespace { ++    struct nvme_namespace *next; ++    struct nvme_namespace **prev; ++ ++    char *devname; ++ ++    grub_uint32_t nsnum; ++ ++    struct nvme_ctrl *ctrl; ++ ++    grub_uint32_t ns_id; ++ ++    grub_uint64_t lba_count;              /* The total amount of sectors. */ ++ ++    grub_uint32_t block_size; ++    grub_uint32_t metadata_size; ++    grub_uint32_t max_req_size; ++}; ++ ++/* Data structures for NVMe admin identify commands */ ++ ++struct nvme_identify_ctrl { ++    grub_uint16_t vid; ++    grub_uint16_t ssvid; ++    char sn[20]; ++    char mn[40]; ++    char fr[8]; ++ ++    grub_uint8_t rab; ++    grub_uint8_t ieee[3]; ++    grub_uint8_t cmic; ++    grub_uint8_t mdts; ++ ++    char _boring[516 - 78]; ++ ++    grub_uint32_t nn;                     /* number of namespaces */ ++}; ++ ++struct nvme_identify_ns_list { ++    grub_uint32_t ns_id[1024]; ++}; ++ ++struct nvme_lba_format { ++    grub_uint16_t ms; ++    grub_uint8_t  lbads; ++    grub_uint8_t  rp; ++}; ++ ++struct nvme_identify_ns { ++    grub_uint64_t nsze; ++    grub_uint64_t ncap; ++    grub_uint64_t nuse; ++    grub_uint8_t  nsfeat; ++    grub_uint8_t  nlbaf; ++    grub_uint8_t  flbas; ++ ++    char _boring[128 - 27]; ++ ++    struct nvme_lba_format lbaf[16]; ++}; ++ ++union nvme_identify { ++    struct nvme_identify_ns      ns; ++    struct nvme_identify_ctrl    ctrl; ++    struct nvme_identify_ns_list ns_list; ++}; ++ ++/* NVMe constants */ ++ ++#define NVME_CAP_CSS_NVME (1ULL << 37) ++ ++#define NVME_CSTS_FATAL   (1U <<  1) ++#define NVME_CSTS_RDY     (1U <<  0) ++ ++#define NVME_CC_EN        (1U <<  0) ++ ++#define NVME_SQE_OPC_ADMIN_CREATE_IO_SQ 1U ++#define NVME_SQE_OPC_ADMIN_CREATE_IO_CQ 5U ++#define NVME_SQE_OPC_ADMIN_IDENTIFY     6U ++ ++#define NVME_SQE_OPC_IO_WRITE 1U ++#define NVME_SQE_OPC_IO_READ  2U ++ ++#define NVME_ADMIN_IDENTIFY_CNS_ID_NS       0U ++#define NVME_ADMIN_IDENTIFY_CNS_ID_CTRL     1U ++#define NVME_ADMIN_IDENTIFY_CNS_GET_NS_LIST 2U ++ ++#define NVME_CQE_DW3_P (1U << 16) ++ ++#define NVME_PAGE_SIZE 4096 ++#define NVME_PAGE_MASK ~(NVME_PAGE_SIZE - 1) ++ ++/* Length for the queue entries. */ ++#define NVME_SQE_SIZE_LOG 6 ++#define NVME_CQE_SIZE_LOG 4 ++ ++#endif ++ ++/* EOF */ +diff --git a/grub-core/disk/nvme.c b/grub-core/disk/nvme.c +new file mode 100644 +index 000000000..093237c70 +--- /dev/null ++++ b/grub-core/disk/nvme.c +@@ -0,0 +1,781 @@ ++// Low level NVMe disk access ++// ++// Based on SeaBIOS NVMe driver - Copyright 2017 Amazon.com, Inc. or its affiliates. ++// Port to GRUB2 done by Mate Kukri ++// ++// This file may be distributed under the terms of the GNU LGPLv3 license. ++ ++#include <grub/disk.h> ++#include <grub/dl.h> ++#include <grub/pci.h> ++#include "nvme-int.h" ++ ++GRUB_MOD_LICENSE ("GPLv3"); /* LGPLv3 in reality but it is GPLv3 compatible */ ++ ++static grub_uint32_t grub_nvme_ctrlcnt; ++static grub_uint32_t grub_nvme_nscnt; ++ ++static struct nvme_namespace *grub_nvme_namespaces; ++ ++// Page aligned "dma bounce buffer" of size NVME_PAGE_SIZE ++static void *nvme_dma_buffer; ++ ++static void * ++zalloc_page_aligned(grub_uint32_t size) ++{ ++    void *res = grub_memalign(NVME_PAGE_SIZE, size); ++    if (res) grub_memset(res, 0, size); ++    return res; ++} ++ ++static void ++nvme_init_queue_common(struct nvme_ctrl *ctrl, struct nvme_queue *q, grub_uint16_t q_idx, ++                       grub_uint16_t length) ++{ ++    grub_memset(q, 0, sizeof(*q)); ++    q->dbl = (grub_uint32_t *)((char *)ctrl->reg + 0x1000 + q_idx * ctrl->doorbell_stride); ++    grub_dprintf("nvme", " q %p q_idx %u dbl %p\n", q, q_idx, q->dbl); ++    q->mask = length - 1; ++} ++ ++static int ++nvme_init_sq(struct nvme_ctrl *ctrl, struct nvme_sq *sq, grub_uint16_t q_idx, grub_uint16_t length, ++             struct nvme_cq *cq) ++{ ++    nvme_init_queue_common(ctrl, &sq->common, q_idx, length); ++    sq->sqe = zalloc_page_aligned(sizeof(*sq->sqe) * length); ++ ++    if (!sq->sqe) { ++        return -1; ++    } ++ ++    grub_dprintf("nvme", "sq %p q_idx %u sqe %p\n", sq, q_idx, sq->sqe); ++    sq->cq   = cq; ++    sq->head = 0; ++    sq->tail = 0; ++ ++    return 0; ++} ++ ++static int ++nvme_init_cq(struct nvme_ctrl *ctrl, struct nvme_cq *cq, grub_uint16_t q_idx, grub_uint16_t length) ++{ ++    nvme_init_queue_common(ctrl, &cq->common, q_idx, length); ++    cq->cqe = zalloc_page_aligned(sizeof(*cq->cqe) * length); ++    if (!cq->cqe) { ++        return -1; ++    } ++ ++    cq->head = 0; ++ ++    /* All CQE phase bits are initialized to zero. This means initially we wait ++       for the host controller to set these to 1. */ ++    cq->phase = 1; ++ ++    return 0; ++} ++ ++static int ++nvme_poll_cq(struct nvme_cq *cq) ++{ ++    grub_uint32_t dw3 = *(volatile grub_uint32_t *) &cq->cqe[cq->head].dword[3]; ++    return (!!(dw3 & NVME_CQE_DW3_P) == cq->phase); ++} ++ ++static int ++nvme_is_cqe_success(struct nvme_cqe const *cqe) ++{ ++    return ((cqe->status >> 1) & 0xFF) == 0; ++} ++ ++static struct nvme_cqe ++nvme_error_cqe(void) ++{ ++    struct nvme_cqe r; ++ ++    /* 0xFF is a vendor specific status code != success. Should be okay for ++       indicating failure. */ ++    grub_memset(&r, 0xFF, sizeof(r)); ++    return r; ++} ++ ++static struct nvme_cqe ++nvme_consume_cqe(struct nvme_sq *sq) ++{ ++    struct nvme_cq *cq = sq->cq; ++ ++    if (!nvme_poll_cq(cq)) { ++        /* Cannot consume a completion queue entry, if there is none ready. */ ++        return nvme_error_cqe(); ++    } ++ ++    struct nvme_cqe *cqe = &cq->cqe[cq->head]; ++    grub_uint16_t cq_next_head = (cq->head + 1) & cq->common.mask; ++    grub_dprintf("nvme", "cq %p head %u -> %u\n", cq, cq->head, cq_next_head); ++    if (cq_next_head < cq->head) { ++        grub_dprintf("nvme", "cq %p wrap\n", cq); ++        cq->phase = ~cq->phase; ++    } ++    cq->head = cq_next_head; ++ ++    /* Update the submission queue head. */ ++    if (cqe->sq_head != sq->head) { ++        sq->head = cqe->sq_head; ++        grub_dprintf("nvme", "sq %p advanced to %u\n", sq, cqe->sq_head); ++    } ++ ++    /* Tell the controller that we consumed the completion. */ ++    *(volatile grub_uint32_t *) cq->common.dbl = cq->head; ++ ++    return *cqe; ++} ++ ++static struct nvme_cqe ++nvme_wait(struct nvme_sq *sq) ++{ ++    // static const unsigned nvme_timeout = 5000 /* ms */; ++    // grub_uint32_t to = timer_calc(nvme_timeout); ++    while (!nvme_poll_cq(sq->cq)) { ++        /* FIXME ++        yield(); ++ ++        if (timer_check(to)) { ++            warn_timeout(); ++            return nvme_error_cqe(); ++        }*/ ++    } ++ ++    return nvme_consume_cqe(sq); ++} ++ ++/* Returns the next submission queue entry (or NULL if the queue is full). It ++   also fills out Command Dword 0 and clears the rest. */ ++static struct nvme_sqe * ++nvme_get_next_sqe(struct nvme_sq *sq, grub_uint8_t opc, void *metadata, void *data, void *data2) ++{ ++    if (((sq->head + 1) & sq->common.mask) == sq->tail) { ++        grub_dprintf("nvme", "submission queue is full\n"); ++        return NULL; ++    } ++ ++    struct nvme_sqe *sqe = &sq->sqe[sq->tail]; ++    grub_dprintf("nvme", "sq %p next_sqe %u\n", sq, sq->tail); ++ ++    grub_memset(sqe, 0, sizeof(*sqe)); ++    sqe->cdw0 = opc | (sq->tail << 16 /* CID */); ++    sqe->mptr = (grub_uint32_t)metadata; ++    sqe->dptr_prp1 = (grub_uint32_t)data; ++    sqe->dptr_prp2 = (grub_uint32_t)data2; ++ ++    return sqe; ++} ++ ++/* Call this after you've filled out an sqe that you've got from nvme_get_next_sqe. */ ++static void ++nvme_commit_sqe(struct nvme_sq *sq) ++{ ++    grub_dprintf("nvme", "sq %p commit_sqe %u\n", sq, sq->tail); ++    sq->tail = (sq->tail + 1) & sq->common.mask; ++    *(volatile grub_uint32_t *) sq->common.dbl = sq->tail; ++} ++ ++/* Perform an identify command on the admin queue and return the resulting ++   buffer. This may be a NULL pointer, if something failed. This function ++   cannot be used after initialization, because it uses buffers in tmp zone. */ ++static union nvme_identify * ++nvme_admin_identify(struct nvme_ctrl *ctrl, grub_uint8_t cns, grub_uint32_t nsid) ++{ ++    union nvme_identify *identify_buf = zalloc_page_aligned(4096); ++    if (!identify_buf) ++        return NULL; ++ ++    struct nvme_sqe *cmd_identify; ++    cmd_identify = nvme_get_next_sqe(&ctrl->admin_sq, ++                                     NVME_SQE_OPC_ADMIN_IDENTIFY, NULL, ++                                     identify_buf, NULL); ++    if (!cmd_identify) ++        goto error; ++ ++    cmd_identify->nsid = nsid; ++    cmd_identify->dword[10] = cns; ++ ++    nvme_commit_sqe(&ctrl->admin_sq); ++ ++    struct nvme_cqe cqe = nvme_wait(&ctrl->admin_sq); ++ ++    if (!nvme_is_cqe_success(&cqe)) { ++        goto error; ++    } ++ ++    return identify_buf; ++ error: ++    grub_free(identify_buf); ++    return NULL; ++} ++ ++static struct nvme_identify_ctrl * ++nvme_admin_identify_ctrl(struct nvme_ctrl *ctrl) ++{ ++    return &nvme_admin_identify(ctrl, NVME_ADMIN_IDENTIFY_CNS_ID_CTRL, 0)->ctrl; ++} ++ ++static struct nvme_identify_ns * ++nvme_admin_identify_ns(struct nvme_ctrl *ctrl, grub_uint32_t ns_id) ++{ ++    return &nvme_admin_identify(ctrl, NVME_ADMIN_IDENTIFY_CNS_ID_NS, ++                                ns_id)->ns; ++} ++ ++static void ++nvme_probe_ns(struct nvme_ctrl *ctrl, grub_uint32_t ns_idx, grub_uint8_t mdts) ++{ ++    grub_uint32_t ns_id = ns_idx + 1; ++ ++    struct nvme_identify_ns *id = nvme_admin_identify_ns(ctrl, ns_id); ++    if (!id) { ++        grub_dprintf("nvme", "NVMe couldn't identify namespace %u.\n", ns_id); ++        goto free_buffer; ++    } ++ ++    grub_uint8_t current_lba_format = id->flbas & 0xF; ++    if (current_lba_format > id->nlbaf) { ++        grub_dprintf("nvme", "NVMe NS %u: current LBA format %u is beyond what the " ++                " namespace supports (%u)?\n", ++                ns_id, current_lba_format, id->nlbaf + 1); ++        goto free_buffer; ++    } ++ ++    if (!id->nsze) { ++        grub_dprintf("nvme", "NVMe NS %u is inactive.\n", ns_id); ++        goto free_buffer; ++    } ++ ++    if (!nvme_dma_buffer) { ++        nvme_dma_buffer = zalloc_page_aligned(NVME_PAGE_SIZE); ++        if (!nvme_dma_buffer) { ++            goto free_buffer; ++        } ++    } ++ ++    struct nvme_namespace *ns = grub_malloc(sizeof(*ns)); ++    if (!ns) { ++        goto free_buffer; ++    } ++    grub_memset(ns, 0, sizeof(*ns)); ++    ns->ctrl  = ctrl; ++    ns->ns_id = ns_id; ++    ns->lba_count = id->nsze; ++ ++    struct nvme_lba_format *fmt = &id->lbaf[current_lba_format]; ++ ++    ns->block_size    = 1U << fmt->lbads; ++    ns->metadata_size = fmt->ms; ++ ++    if (ns->block_size > NVME_PAGE_SIZE) { ++        /* If we see devices that trigger this path, we need to increase our ++           buffer size. */ ++        grub_free(ns); ++        goto free_buffer; ++    } ++ ++    if (mdts) { ++        ns->max_req_size = ((1U << mdts) * NVME_PAGE_SIZE) / ns->block_size; ++        grub_dprintf("nvme", "NVME NS %u max request size: %d sectors\n", ++                ns_id, ns->max_req_size); ++    } else { ++        ns->max_req_size = -1U; ++    } ++ ++    ns->devname = grub_xasprintf("nvme%un%u", ctrl->ctrlnum, ns_id); ++    ns->nsnum = grub_nvme_nscnt++; ++ ++    grub_list_push (GRUB_AS_LIST_P (&grub_nvme_namespaces), GRUB_AS_LIST (ns)); ++ ++free_buffer: ++    grub_free(id); ++} ++ ++ ++/* Release memory allocated for a completion queue */ ++static void ++nvme_destroy_cq(struct nvme_cq *cq) ++{ ++    grub_free(cq->cqe); ++    cq->cqe = NULL; ++} ++ ++/* Release memory allocated for a submission queue */ ++static void ++nvme_destroy_sq(struct nvme_sq *sq) ++{ ++    grub_free(sq->sqe); ++    sq->sqe = NULL; ++} ++ ++/* Returns 0 on success. */ ++static int ++nvme_create_io_cq(struct nvme_ctrl *ctrl, struct nvme_cq *cq, grub_uint16_t q_idx) ++{ ++    int rc; ++    struct nvme_sqe *cmd_create_cq; ++    grub_uint32_t length = 1 + (ctrl->reg->cap & 0xffff); ++    if (length > NVME_PAGE_SIZE / sizeof(struct nvme_cqe)) ++        length = NVME_PAGE_SIZE / sizeof(struct nvme_cqe); ++ ++    rc = nvme_init_cq(ctrl, cq, q_idx, length); ++    if (rc) { ++        goto err; ++    } ++ ++    cmd_create_cq = nvme_get_next_sqe(&ctrl->admin_sq, ++                                      NVME_SQE_OPC_ADMIN_CREATE_IO_CQ, NULL, ++                                      cq->cqe, NULL); ++    if (!cmd_create_cq) { ++        goto err_destroy_cq; ++    } ++ ++    cmd_create_cq->dword[10] = (cq->common.mask << 16) | (q_idx >> 1); ++    cmd_create_cq->dword[11] = 1 /* physically contiguous */; ++ ++    nvme_commit_sqe(&ctrl->admin_sq); ++ ++    struct nvme_cqe cqe = nvme_wait(&ctrl->admin_sq); ++ ++    if (!nvme_is_cqe_success(&cqe)) { ++        grub_dprintf("nvme", "create io cq failed: %08x %08x %08x %08x\n", ++                cqe.dword[0], cqe.dword[1], cqe.dword[2], cqe.dword[3]); ++ ++        goto err_destroy_cq; ++    } ++ ++    return 0; ++ ++err_destroy_cq: ++    nvme_destroy_cq(cq); ++err: ++    return -1; ++} ++ ++/* Returns 0 on success. */ ++static int ++nvme_create_io_sq(struct nvme_ctrl *ctrl, struct nvme_sq *sq, grub_uint16_t q_idx, struct nvme_cq *cq) ++{ ++    int rc; ++    struct nvme_sqe *cmd_create_sq; ++    grub_uint32_t length = 1 + (ctrl->reg->cap & 0xffff); ++    if (length > NVME_PAGE_SIZE / sizeof(struct nvme_cqe)) ++        length = NVME_PAGE_SIZE / sizeof(struct nvme_cqe); ++ ++    rc = nvme_init_sq(ctrl, sq, q_idx, length, cq); ++    if (rc) { ++        goto err; ++    } ++ ++    cmd_create_sq = nvme_get_next_sqe(&ctrl->admin_sq, ++                                      NVME_SQE_OPC_ADMIN_CREATE_IO_SQ, NULL, ++                                      sq->sqe, NULL); ++    if (!cmd_create_sq) { ++        goto err_destroy_sq; ++    } ++ ++    cmd_create_sq->dword[10] = (sq->common.mask << 16) | (q_idx >> 1); ++    cmd_create_sq->dword[11] = (q_idx >> 1) << 16 | 1 /* contiguous */; ++    grub_dprintf("nvme", "sq %p create dword10 %08x dword11 %08x\n", sq, ++            cmd_create_sq->dword[10], cmd_create_sq->dword[11]); ++ ++    nvme_commit_sqe(&ctrl->admin_sq); ++ ++    struct nvme_cqe cqe = nvme_wait(&ctrl->admin_sq); ++ ++    if (!nvme_is_cqe_success(&cqe)) { ++        grub_dprintf("nvme", "create io sq failed: %08x %08x %08x %08x\n", ++                cqe.dword[0], cqe.dword[1], cqe.dword[2], cqe.dword[3]); ++        goto err_destroy_sq; ++    } ++ ++    return 0; ++ ++err_destroy_sq: ++    nvme_destroy_sq(sq); ++err: ++    return -1; ++} ++ ++/* Reads count sectors into buf. The buffer cannot cross page boundaries. */ ++static int ++nvme_io_xfer(struct nvme_namespace *ns, grub_uint64_t lba, void *prp1, void *prp2, ++             grub_uint16_t count, int write) ++{ ++    if (((grub_uint32_t)prp1 & 0x3) || ((grub_uint32_t)prp2 & 0x3)) { ++        /* Buffer is misaligned */ ++        return -1; ++    } ++ ++    struct nvme_sqe *io_read = nvme_get_next_sqe(&ns->ctrl->io_sq, ++                                                 write ? NVME_SQE_OPC_IO_WRITE ++                                                       : NVME_SQE_OPC_IO_READ, ++                                                 NULL, prp1, prp2); ++    io_read->nsid = ns->ns_id; ++    io_read->dword[10] = (grub_uint32_t)lba; ++    io_read->dword[11] = (grub_uint32_t)(lba >> 32); ++    io_read->dword[12] = (1U << 31 /* limited retry */) | (count - 1); ++ ++    nvme_commit_sqe(&ns->ctrl->io_sq); ++ ++    struct nvme_cqe cqe = nvme_wait(&ns->ctrl->io_sq); ++ ++    if (!nvme_is_cqe_success(&cqe)) { ++        grub_dprintf("nvme", "read io: %08x %08x %08x %08x\n", ++                cqe.dword[0], cqe.dword[1], cqe.dword[2], cqe.dword[3]); ++ ++        return -1; ++    } ++ ++    grub_dprintf("nvme", "ns %u %s lba %llu+%u\n", ns->ns_id, write ? "write" : "read", ++            lba, count); ++    return count; ++} ++ ++// Transfer up to one page of data using the internal dma bounce buffer ++static int ++nvme_bounce_xfer(struct nvme_namespace *ns, grub_uint64_t lba, void *buf, grub_uint16_t count, ++                 int write) ++{ ++    grub_uint16_t const max_blocks = NVME_PAGE_SIZE / ns->block_size; ++    grub_uint16_t blocks = count < max_blocks ? count : max_blocks; ++ ++    if (write) ++        grub_memcpy(nvme_dma_buffer, buf, blocks * ns->block_size); ++ ++    int res = nvme_io_xfer(ns, lba, nvme_dma_buffer, NULL, blocks, write); ++ ++    if (!write && res >= 0) ++        grub_memcpy(buf, nvme_dma_buffer, res * ns->block_size); ++ ++    return res; ++} ++ ++#define NVME_MAX_PRPL_ENTRIES 15 /* Allows requests up to 64kb */ ++ ++// Transfer data using page list (if applicable) ++static int ++nvme_prpl_xfer(struct nvme_namespace *ns, grub_uint64_t lba, void *buf, grub_uint16_t count, ++               int write) ++{ ++    grub_uint32_t base = (long)buf; ++    grub_int32_t size; ++ ++    if (count > ns->max_req_size) ++        count = ns->max_req_size; ++ ++    size = count * ns->block_size; ++    /* Special case for transfers that fit into PRP1, but are unaligned */ ++    if (((size + (base & ~NVME_PAGE_MASK)) <= NVME_PAGE_SIZE)) ++        goto single; ++ ++    /* Every request has to be page aligned */ ++    if (base & ~NVME_PAGE_MASK) ++        goto bounce; ++ ++    /* Make sure a full block fits into the last chunk */ ++    if (size & (ns->block_size - 1ULL)) ++        goto bounce; ++ ++    /* Build PRP list if we need to describe more than 2 pages */ ++    if ((ns->block_size * count) > (NVME_PAGE_SIZE * 2)) { ++        grub_uint32_t prpl_len = 0; ++        grub_uint64_t *prpl = nvme_dma_buffer; ++        int first_page = 1; ++        for (; size > 0; base += NVME_PAGE_SIZE, size -= NVME_PAGE_SIZE) { ++            if (first_page) { ++                /* First page is special */ ++                first_page = 0; ++                continue; ++            } ++            if (prpl_len >= NVME_MAX_PRPL_ENTRIES) ++                goto bounce; ++            prpl[prpl_len++] = base; ++        } ++        return nvme_io_xfer(ns, lba, buf, prpl, count, write); ++    } ++ ++    /* Directly embed the 2nd page if we only need 2 pages */ ++    if ((ns->block_size * count) > NVME_PAGE_SIZE) ++        return nvme_io_xfer(ns, lba, buf, (char *) buf + NVME_PAGE_SIZE, count, write); ++ ++single: ++    /* One page is enough, don't expose anything else */ ++    return nvme_io_xfer(ns, lba, buf, NULL, count, write); ++ ++bounce: ++    /* Use bounce buffer to make transfer */ ++    return nvme_bounce_xfer(ns, lba, buf, count, write); ++} ++ ++static int ++nvme_create_io_queues(struct nvme_ctrl *ctrl) ++{ ++    if (nvme_create_io_cq(ctrl, &ctrl->io_cq, 3)) ++        goto err; ++ ++    if (nvme_create_io_sq(ctrl, &ctrl->io_sq, 2, &ctrl->io_cq)) ++        goto err_free_cq; ++ ++    return 0; ++ ++ err_free_cq: ++    nvme_destroy_cq(&ctrl->io_cq); ++ err: ++    return -1; ++} ++ ++/* Waits for CSTS.RDY to match rdy. Returns 0 on success. */ ++static int ++nvme_wait_csts_rdy(struct nvme_ctrl *ctrl, unsigned rdy) ++{ ++    // grub_uint32_t const max_to = 500 /* ms */ * ((ctrl->reg->cap >> 24) & 0xFFU); ++    // grub_uint32_t to = timer_calc(max_to); ++    grub_uint32_t csts; ++ ++    while (rdy != ((csts = ctrl->reg->csts) & NVME_CSTS_RDY)) { ++        // FIXME ++        //yield(); ++ ++        if (csts & NVME_CSTS_FATAL) { ++            grub_dprintf("nvme", "NVMe fatal error during controller shutdown\n"); ++            return -1; ++        } ++ ++        /* ++        if (timer_check(to)) { ++            warn_timeout(); ++            return -1; ++        }*/ ++    } ++ ++    return 0; ++} ++ ++/* Returns 0 on success. */ ++static int grub_nvme_controller_enable(struct nvme_ctrl *ctrl) ++{ ++    grub_pci_address_t addr; ++    int rc; ++ ++    addr = grub_pci_make_address (ctrl->pci, GRUB_PCI_REG_COMMAND); ++    grub_pci_write_word (addr, grub_pci_read_word (addr) | GRUB_PCI_COMMAND_BUS_MASTER); ++ ++    /* Turn the controller off. */ ++    ctrl->reg->cc = 0; ++    if (nvme_wait_csts_rdy(ctrl, 0)) { ++        grub_dprintf("nvme", "NVMe fatal error during controller shutdown\n"); ++        return -1; ++    } ++ ++    ctrl->doorbell_stride = 4U << ((ctrl->reg->cap >> 32) & 0xF); ++ ++    rc = nvme_init_cq(ctrl, &ctrl->admin_cq, 1, ++                      NVME_PAGE_SIZE / sizeof(struct nvme_cqe)); ++    if (rc) { ++        return -1; ++    } ++ ++    rc = nvme_init_sq(ctrl, &ctrl->admin_sq, 0, ++                      NVME_PAGE_SIZE / sizeof(struct nvme_sqe), &ctrl->admin_cq); ++    if (rc) { ++        goto err_destroy_admin_cq; ++    } ++ ++    ctrl->reg->aqa = ctrl->admin_cq.common.mask << 16 ++        | ctrl->admin_sq.common.mask; ++ ++    ctrl->reg->asq = (grub_uint32_t)ctrl->admin_sq.sqe; ++    ctrl->reg->acq = (grub_uint32_t)ctrl->admin_cq.cqe; ++ ++    grub_dprintf("nvme", "  admin submission queue: %p\n", ctrl->admin_sq.sqe); ++    grub_dprintf("nvme", "  admin completion queue: %p\n", ctrl->admin_cq.cqe); ++ ++    ctrl->reg->cc = NVME_CC_EN | (NVME_CQE_SIZE_LOG << 20) ++        | (NVME_SQE_SIZE_LOG << 16 /* IOSQES */); ++ ++    if (nvme_wait_csts_rdy(ctrl, 1)) { ++        grub_dprintf("nvme", "NVMe fatal error while enabling controller\n"); ++        goto err_destroy_admin_sq; ++    } ++ ++    /* The admin queue is set up and the controller is ready. Let's figure out ++       what namespaces we have. */ ++ ++    struct nvme_identify_ctrl *identify = nvme_admin_identify_ctrl(ctrl); ++ ++    if (!identify) { ++        grub_dprintf("nvme", "NVMe couldn't identify controller.\n"); ++        goto err_destroy_admin_sq; ++    } ++ ++    grub_dprintf("nvme", "NVMe has %u namespace%s.\n", ++            identify->nn, (identify->nn == 1) ? "" : "s"); ++ ++    ctrl->ns_count = identify->nn; ++    grub_uint8_t mdts = identify->mdts; ++    grub_free(identify); ++ ++    if ((ctrl->ns_count == 0) || nvme_create_io_queues(ctrl)) { ++        /* No point to continue, if the controller says it doesn't have ++           namespaces or we couldn't create I/O queues. */ ++        goto err_destroy_admin_sq; ++    } ++ ++    /* Give the controller a global number */ ++    ctrl->ctrlnum = grub_nvme_ctrlcnt++; ++ ++    /* Populate namespace IDs */ ++    for (grub_uint32_t ns_idx = 0; ns_idx < ctrl->ns_count; ns_idx++) { ++        nvme_probe_ns(ctrl, ns_idx, mdts); ++    } ++ ++    grub_dprintf("nvme", "NVMe initialization complete!\n"); ++    return 0; ++ ++ err_destroy_admin_sq: ++    nvme_destroy_sq(&ctrl->admin_sq); ++ err_destroy_admin_cq: ++    nvme_destroy_cq(&ctrl->admin_cq); ++    return -1; ++} ++ ++static int grub_nvme_pci_probe(grub_pci_device_t dev, grub_pci_id_t pciid __attribute__ ((unused)), void *data __attribute__ ((unused))) ++{ ++    grub_pci_address_t addr; ++    grub_uint32_t class, bar, version; ++    struct nvme_reg volatile *reg; ++ ++    class = grub_pci_read (grub_pci_make_address (dev, GRUB_PCI_REG_CLASS)); ++    if (class >> 16 != 0x0108) ++        return 0; ++    if ((class >> 8 & 0xff) != 2) { /* as of NVM 1.0e */ ++        grub_dprintf("nvme", "Found incompatble NVMe: prog-if=%02x\n", class >> 8 & 0xff); ++        return 0; ++    } ++ ++    bar = grub_pci_read (grub_pci_make_address (dev, GRUB_PCI_REG_ADDRESS_REG0)); ++    reg = grub_pci_device_map_range (dev, bar & GRUB_PCI_ADDR_MEM_MASK, sizeof (*reg)); ++ ++    addr = grub_pci_make_address (dev, GRUB_PCI_REG_COMMAND); ++    grub_pci_write_word (addr, grub_pci_read_word (addr) | GRUB_PCI_COMMAND_MEM_ENABLED); ++ ++    version = reg->vs; ++    grub_dprintf("nvme", "Found NVMe controller with version %u.%u.%u.\n", version >> 16, (version >> 8) & 0xFF, version & 0xFF); ++    grub_dprintf("nvme", "  Capabilities %016llx\n", reg->cap); ++ ++    if (~reg->cap & NVME_CAP_CSS_NVME) { ++        grub_dprintf("nvme", "Controller doesn't speak NVMe command set. Skipping.\n"); ++        goto err; ++    } ++ ++    struct nvme_ctrl *ctrl = grub_malloc(sizeof(*ctrl)); ++    if (!ctrl) ++        goto err; ++ ++    grub_memset(ctrl, 0, sizeof(*ctrl)); ++ ++    ctrl->reg = reg; ++    ctrl->pci = dev; ++ ++    if (grub_nvme_controller_enable(ctrl)) ++        goto err_free_ctrl; ++ ++    return 0; ++ ++ err_free_ctrl: ++    grub_free(ctrl); ++ err: ++    grub_dprintf("nvme", "Failed to enable NVMe controller.\n"); ++    return 0; ++} ++ ++static int ++grub_nvme_iterate (grub_disk_dev_iterate_hook_t hook, void *hook_data, grub_disk_pull_t pull) ++{ ++  struct nvme_namespace *ns; ++ ++  if (pull != GRUB_DISK_PULL_NONE) ++    return 0; ++ ++  FOR_LIST_ELEMENTS(ns, grub_nvme_namespaces) ++    if (hook (ns->devname, hook_data)) ++      return 1; ++ ++  return 0; ++} ++ ++static grub_err_t ++grub_nvme_open (const char *name __attribute ((unused)), grub_disk_t disk __attribute ((unused))) ++{ ++  struct nvme_namespace *ns; ++ ++  FOR_LIST_ELEMENTS(ns, grub_nvme_namespaces) ++    if (grub_strcmp (ns->devname, name) == 0) ++      break; ++ ++  if (! ns) ++    return grub_error (GRUB_ERR_UNKNOWN_DEVICE, "can't open device"); ++ ++  disk->total_sectors = ns->lba_count; ++  disk->max_agglomerate = ns->max_req_size; ++ ++  disk->id = ns->nsnum; /* global id of the namespace */ ++ ++  disk->data = ns; ++ ++  return 0; ++} ++ ++static grub_err_t ++nvme_readwrite(struct nvme_namespace *ns, grub_disk_addr_t sector, grub_size_t num_sectors, char *buf, int write) ++{ ++    for (int i = 0; i < num_sectors;) { ++        grub_uint16_t blocks_remaining = num_sectors - i; ++        char *op_buf = buf + i * ns->block_size; ++        int blocks = nvme_prpl_xfer(ns, sector + i, op_buf, blocks_remaining, write); ++        if (blocks < 0) ++            return GRUB_ERR_IO; ++        i += blocks; ++    } ++    return GRUB_ERR_NONE; ++} ++ ++static grub_err_t ++grub_nvme_read (grub_disk_t disk, grub_disk_addr_t sector, grub_size_t num_sectors, char *buf) ++{ ++  return nvme_readwrite((struct nvme_namespace *) disk->data, sector, num_sectors, buf, 0); ++} ++ ++static grub_err_t ++grub_nvme_write (grub_disk_t disk, grub_disk_addr_t sector, grub_size_t num_sectors, const char *buf) ++{ ++  return nvme_readwrite((struct nvme_namespace *) disk->data, sector, num_sectors, buf, 1); ++} ++ ++static struct grub_disk_dev grub_nvme_dev = ++  { ++    .name = "nvme", ++    .id = GRUB_DISK_DEVICE_NVME_ID, ++    .disk_iterate = grub_nvme_iterate, ++    .disk_open = grub_nvme_open, ++    .disk_read = grub_nvme_read, ++    .disk_write = grub_nvme_write, ++    .next = 0 ++  }; ++ ++GRUB_MOD_INIT(nvme) ++{ ++  grub_stop_disk_firmware (); ++  grub_pci_iterate (grub_nvme_pci_probe, NULL); ++  grub_disk_dev_register (&grub_nvme_dev); ++} ++ ++GRUB_MOD_FINI(nvme) ++{ ++  grub_disk_dev_unregister (&grub_nvme_dev); ++} +diff --git a/include/grub/disk.h b/include/grub/disk.h +index fbf23df7f..186e76f0b 100644 +--- a/include/grub/disk.h ++++ b/include/grub/disk.h +@@ -52,6 +52,7 @@ enum grub_disk_dev_id +     GRUB_DISK_DEVICE_UBOOTDISK_ID, +     GRUB_DISK_DEVICE_XEN, +     GRUB_DISK_DEVICE_OBDISK_ID, ++    GRUB_DISK_DEVICE_NVME_ID +   }; +  + struct grub_disk; +--  +2.39.2 + | 
