summaryrefslogtreecommitdiff
path: root/resources/coreboot/haswell
diff options
context:
space:
mode:
authorLeah Rowe <leah@libreboot.org>2023-03-18 00:36:27 +0000
committerLeah Rowe <leah@libreboot.org>2023-03-18 00:55:10 +0000
commit548872ce8e84fe10d52417acab9b3cf886821386 (patch)
treeb549ad51c9c693a323f0f8edc9a11eab92220013 /resources/coreboot/haswell
parenta942bd6590dd450140fc0af8549ca470a065adf5 (diff)
haswell boards: use libre mrc.bin replacement
courtesy of Angel Pons from the coreboot project this uses the following patch set from gerrit, as yet unmerged (in coreboot master) on this date: https://review.coreboot.org/c/coreboot/+/64198/5 logic for downloading mrc blobs has been deleted from lbmk, as this is now completely obsolete (for haswell boards) if other platforms are added later that need mrc.bin, then logic will be re-added again for that
Diffstat (limited to 'resources/coreboot/haswell')
-rw-r--r--resources/coreboot/haswell/board.cfg4
-rw-r--r--resources/coreboot/haswell/patches/0001-commonlib-clamp.h-Add-more-clamping-functions.patch54
-rw-r--r--resources/coreboot/haswell/patches/0002-nb-intel-haswell-Introduce-option-to-not-use-MRC.bin.patch143
-rw-r--r--resources/coreboot/haswell/patches/0003-haswell-lynxpoint-Add-native-DMI-init.patch615
-rw-r--r--resources/coreboot/haswell/patches/0004-haswell-lynxpoint-Add-native-early-ME-init.patch148
-rw-r--r--resources/coreboot/haswell/patches/0005-sb-intel-lynxpoint-Add-native-USB-init.patch783
-rw-r--r--resources/coreboot/haswell/patches/0006-sb-intel-lynxpoint-Add-native-thermal-init.patch128
-rw-r--r--resources/coreboot/haswell/patches/0007-sb-intel-lynxpoint-Add-native-PCH-init.patch785
-rw-r--r--resources/coreboot/haswell/patches/0008-nb-intel-haswell-Add-native-raminit-scaffolding.patch407
-rw-r--r--resources/coreboot/haswell/patches/0009-nb-intel-haswell-nri-Only-do-CPU-replacement-check-o.patch57
-rw-r--r--resources/coreboot/haswell/patches/0010-haswell-NRI-Collect-SPD-info.patch344
-rw-r--r--resources/coreboot/haswell/patches/0011-haswell-NRI-Initialise-MPLL.patch346
-rw-r--r--resources/coreboot/haswell/patches/0012-haswell-NRI-Post-process-selected-timings.patch249
-rw-r--r--resources/coreboot/haswell/patches/0013-haswell-NRI-Configure-initial-MC-settings.patch1593
-rw-r--r--resources/coreboot/haswell/patches/0014-haswell-NRI-Add-timings-refresh-programming.patch541
-rw-r--r--resources/coreboot/haswell/patches/0015-haswell-NRI-Program-memory-map.patch263
-rw-r--r--resources/coreboot/haswell/patches/0016-haswell-NRI-Add-DDR3-JEDEC-reset-and-init.patch1038
-rw-r--r--resources/coreboot/haswell/patches/0017-haswell-NRI-Add-pre-training-steps.patch384
-rw-r--r--resources/coreboot/haswell/patches/0018-haswell-NRI-Add-REUT-I-O-test-library.patch1128
-rw-r--r--resources/coreboot/haswell/patches/0019-haswell-NRI-Add-range-tracking-library.patch222
-rw-r--r--resources/coreboot/haswell/patches/0020-haswell-NRI-Add-library-to-change-margins.patch294
-rw-r--r--resources/coreboot/haswell/patches/0021-haswell-NRI-Add-RcvEn-training.patch708
-rw-r--r--resources/coreboot/haswell/patches/0022-haswell-NRI-Add-function-to-change-margins.patch272
-rw-r--r--resources/coreboot/haswell/patches/0023-haswell-NRI-Add-read-MPR-training.patch331
-rw-r--r--resources/coreboot/haswell/patches/0024-haswell-NRI-Add-write-leveling.patch688
-rw-r--r--resources/coreboot/haswell/patches/0025-haswell-NRI-Add-final-raminit-steps.patch570
-rw-r--r--resources/coreboot/haswell/patches/0026-specifically-use-python3-in-scripts.patch36
27 files changed, 12131 insertions, 0 deletions
diff --git a/resources/coreboot/haswell/board.cfg b/resources/coreboot/haswell/board.cfg
new file mode 100644
index 00000000..423b7d0b
--- /dev/null
+++ b/resources/coreboot/haswell/board.cfg
@@ -0,0 +1,4 @@
+cbtree="haswell"
+romtype="normal"
+cbrevision="1411ecf6f0b2c7395bcb96b856dcfdddb1b0c81b"
+arch="x86_64"
diff --git a/resources/coreboot/haswell/patches/0001-commonlib-clamp.h-Add-more-clamping-functions.patch b/resources/coreboot/haswell/patches/0001-commonlib-clamp.h-Add-more-clamping-functions.patch
new file mode 100644
index 00000000..96e4c14d
--- /dev/null
+++ b/resources/coreboot/haswell/patches/0001-commonlib-clamp.h-Add-more-clamping-functions.patch
@@ -0,0 +1,54 @@
+From dd58f5e9108bc596c93071705d2b53233d13ade6 Mon Sep 17 00:00:00 2001
+From: Angel Pons <th3fanbus@gmail.com>
+Date: Sat, 7 May 2022 20:36:10 +0200
+Subject: [PATCH 01/26] commonlib/clamp.h: Add more clamping functions
+
+Add more clamping functions that work with different types.
+
+Change-Id: I14cf335d5a54f769f8fd9184450957e876affd6b
+Signed-off-by: Angel Pons <th3fanbus@gmail.com>
+---
+ src/commonlib/include/commonlib/clamp.h | 26 +++++++++++++++++--------
+ 1 file changed, 18 insertions(+), 8 deletions(-)
+
+diff --git a/src/commonlib/include/commonlib/clamp.h b/src/commonlib/include/commonlib/clamp.h
+index e01a107ed4..526185195c 100644
+--- a/src/commonlib/include/commonlib/clamp.h
++++ b/src/commonlib/include/commonlib/clamp.h
+@@ -8,15 +8,25 @@
+ /*
+ * Clamp a value, so that it is between a lower and an upper bound.
+ */
+-static inline u32 clamp_u32(const u32 min, const u32 val, const u32 max)
+-{
+- if (val > max)
+- return max;
++#define __MAKE_CLAMP_FUNC(type) \
++ static inline type clamp_##type(const type min, const type val, const type max) \
++ { \
++ if (val > max) \
++ return max; \
++ if (val < min) \
++ return min; \
++ return val; \
++ } \
+
+- if (val < min)
+- return min;
++__MAKE_CLAMP_FUNC(s8) /* clamp_s8 */
++__MAKE_CLAMP_FUNC(u8) /* clamp_u8 */
++__MAKE_CLAMP_FUNC(s16) /* clamp_s16 */
++__MAKE_CLAMP_FUNC(u16) /* clamp_u16 */
++__MAKE_CLAMP_FUNC(s32) /* clamp_s32 */
++__MAKE_CLAMP_FUNC(u32) /* clamp_u32 */
++__MAKE_CLAMP_FUNC(s64) /* clamp_s64 */
++__MAKE_CLAMP_FUNC(u64) /* clamp_u64 */
+
+- return val;
+-}
++#undef __MAKE_CLAMP_FUNC
+
+ #endif /* COMMONLIB_CLAMP_H */
+--
+2.39.2
+
diff --git a/resources/coreboot/haswell/patches/0002-nb-intel-haswell-Introduce-option-to-not-use-MRC.bin.patch b/resources/coreboot/haswell/patches/0002-nb-intel-haswell-Introduce-option-to-not-use-MRC.bin.patch
new file mode 100644
index 00000000..35d5c89e
--- /dev/null
+++ b/resources/coreboot/haswell/patches/0002-nb-intel-haswell-Introduce-option-to-not-use-MRC.bin.patch
@@ -0,0 +1,143 @@
+From c07391821c32cafea950574b85468f5b3284b6df Mon Sep 17 00:00:00 2001
+From: Angel Pons <th3fanbus@gmail.com>
+Date: Fri, 6 May 2022 21:12:14 +0200
+Subject: [PATCH 02/26] nb/intel/haswell: Introduce option to not use MRC.bin
+
+Introduce the `USE_NATIVE_RAMINIT` Kconfig option, which should allow
+booting coreboot on Haswell mainboards without the need of the closed
+source MRC.bin. For now, this option does not work at all; the needed
+magic will be implemented in subsequent commits. Add a config file to
+make sure the newly-introduced option gets build-tested.
+
+Change-Id: I46c77586f9b5771624082e07c60c205e578edd8e
+Signed-off-by: Angel Pons <th3fanbus@gmail.com>
+---
+ configs/config.asrock_b85m_pro4.native_raminit | 5 +++++
+ src/northbridge/intel/haswell/Kconfig | 13 +++++++++++++
+ src/northbridge/intel/haswell/Makefile.inc | 7 ++++++-
+ .../intel/haswell/native_raminit/Makefile.inc | 3 +++
+ .../intel/haswell/native_raminit/raminit_native.c | 15 +++++++++++++++
+ 5 files changed, 42 insertions(+), 1 deletion(-)
+ create mode 100644 configs/config.asrock_b85m_pro4.native_raminit
+ create mode 100644 src/northbridge/intel/haswell/native_raminit/Makefile.inc
+ create mode 100644 src/northbridge/intel/haswell/native_raminit/raminit_native.c
+
+diff --git a/configs/config.asrock_b85m_pro4.native_raminit b/configs/config.asrock_b85m_pro4.native_raminit
+new file mode 100644
+index 0000000000..2de538926f
+--- /dev/null
++++ b/configs/config.asrock_b85m_pro4.native_raminit
+@@ -0,0 +1,5 @@
++# Configuration used to build-test native raminit
++CONFIG_VENDOR_ASROCK=y
++CONFIG_BOARD_ASROCK_B85M_PRO4=y
++CONFIG_USE_NATIVE_RAMINIT=y
++CONFIG_DEBUG_RAM_SETUP=y
+diff --git a/src/northbridge/intel/haswell/Kconfig b/src/northbridge/intel/haswell/Kconfig
+index 50acb09a91..b659bf6d98 100644
+--- a/src/northbridge/intel/haswell/Kconfig
++++ b/src/northbridge/intel/haswell/Kconfig
+@@ -9,6 +9,14 @@ config NORTHBRIDGE_INTEL_HASWELL
+
+ if NORTHBRIDGE_INTEL_HASWELL
+
++config USE_NATIVE_RAMINIT
++ bool "[NOT WORKING] Use native raminit"
++ default n
++ select HAVE_DEBUG_RAM_SETUP
++ help
++ Select if you want to use coreboot implementation of raminit rather than
++ MRC.bin. Currently incomplete and does not boot.
++
+ config HASWELL_VBOOT_IN_BOOTBLOCK
+ depends on VBOOT
+ bool "Start verstage in bootblock"
+@@ -45,6 +53,7 @@ config DCACHE_RAM_BASE
+
+ config DCACHE_RAM_SIZE
+ hex
++ default 0x40000 if USE_NATIVE_RAMINIT
+ default 0x10000
+ help
+ The size of the cache-as-ram region required during bootblock
+@@ -53,12 +62,14 @@ config DCACHE_RAM_SIZE
+
+ config DCACHE_RAM_MRC_VAR_SIZE
+ hex
++ default 0x0 if USE_NATIVE_RAMINIT
+ default 0x30000
+ help
+ The amount of cache-as-ram region required by the reference code.
+
+ config DCACHE_BSP_STACK_SIZE
+ hex
++ default 0x20000 if USE_NATIVE_RAMINIT
+ default 0x2000
+ help
+ The amount of anticipated stack usage in CAR by bootblock and
+@@ -66,6 +77,7 @@ config DCACHE_BSP_STACK_SIZE
+
+ config HAVE_MRC
+ bool "Add a System Agent binary"
++ depends on !USE_NATIVE_RAMINIT
+ help
+ Select this option to add a System Agent binary to
+ the resulting coreboot image.
+@@ -82,6 +94,7 @@ config MRC_FILE
+
+ config HASWELL_HIDE_PEG_FROM_MRC
+ bool "Hide PEG devices from MRC to work around hardcoded MRC behavior"
++ depends on !USE_NATIVE_RAMINIT
+ default y
+ help
+ If set, hides all PEG devices from MRC. This allows the iGPU
+diff --git a/src/northbridge/intel/haswell/Makefile.inc b/src/northbridge/intel/haswell/Makefile.inc
+index 2d1532be05..329f1f7ffe 100644
+--- a/src/northbridge/intel/haswell/Makefile.inc
++++ b/src/northbridge/intel/haswell/Makefile.inc
+@@ -19,6 +19,11 @@ romstage-y += report_platform.c
+
+ postcar-y += memmap.c
+
+-subdirs-y += haswell_mrc
++ifeq ($(CONFIG_USE_NATIVE_RAMINIT),y)
++subdirs-y += native_raminit
++
++else
++subdirs-y += haswell_mrc
++endif
+
+ endif
+diff --git a/src/northbridge/intel/haswell/native_raminit/Makefile.inc b/src/northbridge/intel/haswell/native_raminit/Makefile.inc
+new file mode 100644
+index 0000000000..8cfb4fb33e
+--- /dev/null
++++ b/src/northbridge/intel/haswell/native_raminit/Makefile.inc
+@@ -0,0 +1,3 @@
++## SPDX-License-Identifier: GPL-2.0-or-later
++
++romstage-y += raminit_native.c
+diff --git a/src/northbridge/intel/haswell/native_raminit/raminit_native.c b/src/northbridge/intel/haswell/native_raminit/raminit_native.c
+new file mode 100644
+index 0000000000..1aafdf8659
+--- /dev/null
++++ b/src/northbridge/intel/haswell/native_raminit/raminit_native.c
+@@ -0,0 +1,15 @@
++/* SPDX-License-Identifier: GPL-2.0-or-later */
++
++#include <console/console.h>
++#include <northbridge/intel/haswell/raminit.h>
++
++void perform_raminit(const int s3resume)
++{
++ /*
++ * See, this function's name is a lie. There are more things to
++ * do that memory initialisation, but they are relatively easy.
++ */
++
++ /** TODO: Implement the required magic **/
++ die("NATIVE RAMINIT: More Magic (tm) required.\n");
++}
+--
+2.39.2
+
diff --git a/resources/coreboot/haswell/patches/0003-haswell-lynxpoint-Add-native-DMI-init.patch b/resources/coreboot/haswell/patches/0003-haswell-lynxpoint-Add-native-DMI-init.patch
new file mode 100644
index 00000000..4e70407c
--- /dev/null
+++ b/resources/coreboot/haswell/patches/0003-haswell-lynxpoint-Add-native-DMI-init.patch
@@ -0,0 +1,615 @@
+From 6ec71c6df97eded010e96c4ea2bd37cc6a13849d Mon Sep 17 00:00:00 2001
+From: Angel Pons <th3fanbus@gmail.com>
+Date: Fri, 6 May 2022 21:56:48 +0200
+Subject: [PATCH 03/26] haswell/lynxpoint: Add native DMI init
+
+Implement native DMI init for Haswell and Lynx Point. This is only
+needed on non-ULT platforms, and only when MRC.bin is not used.
+
+TEST=Verify DMI initialises correctly on Asrock B85M Pro4.
+
+Change-Id: I5fb1a2adc4ffbf0ebbf0d2d3a444055c53765faa
+Signed-off-by: Angel Pons <th3fanbus@gmail.com>
+---
+ src/northbridge/intel/haswell/Makefile.inc | 1 +
+ src/northbridge/intel/haswell/early_dmi.c | 96 ++++++++++++
+ src/northbridge/intel/haswell/early_pcie.c | 121 ++++++++++++++
+ src/northbridge/intel/haswell/haswell.h | 3 +
+ .../haswell/native_raminit/raminit_native.c | 15 ++
+ src/northbridge/intel/haswell/vcu_mailbox.c | 147 ++++++++++++++++++
+ src/northbridge/intel/haswell/vcu_mailbox.h | 16 ++
+ src/southbridge/intel/lynxpoint/Makefile.inc | 2 +
+ .../intel/lynxpoint/early_pch_native.c | 52 +++++++
+ src/southbridge/intel/lynxpoint/pch.h | 20 ++-
+ 10 files changed, 472 insertions(+), 1 deletion(-)
+ create mode 100644 src/northbridge/intel/haswell/early_dmi.c
+ create mode 100644 src/northbridge/intel/haswell/early_pcie.c
+ create mode 100644 src/northbridge/intel/haswell/vcu_mailbox.c
+ create mode 100644 src/northbridge/intel/haswell/vcu_mailbox.h
+ create mode 100644 src/southbridge/intel/lynxpoint/early_pch_native.c
+
+diff --git a/src/northbridge/intel/haswell/Makefile.inc b/src/northbridge/intel/haswell/Makefile.inc
+index 329f1f7ffe..df0b097296 100644
+--- a/src/northbridge/intel/haswell/Makefile.inc
++++ b/src/northbridge/intel/haswell/Makefile.inc
+@@ -20,6 +20,7 @@ romstage-y += report_platform.c
+ postcar-y += memmap.c
+
+ ifeq ($(CONFIG_USE_NATIVE_RAMINIT),y)
++romstage-y += early_dmi.c early_pcie.c vcu_mailbox.c
+ subdirs-y += native_raminit
+
+ else
+diff --git a/src/northbridge/intel/haswell/early_dmi.c b/src/northbridge/intel/haswell/early_dmi.c
+new file mode 100644
+index 0000000000..9941242fd5
+--- /dev/null
++++ b/src/northbridge/intel/haswell/early_dmi.c
+@@ -0,0 +1,96 @@
++/* SPDX-License-Identifier: GPL-2.0-or-later */
++
++#include <console/console.h>
++#include <northbridge/intel/haswell/haswell.h>
++#include <southbridge/intel/lynxpoint/pch.h>
++#include <types.h>
++
++static void dmi_print_link_status(int loglevel)
++{
++ const uint16_t dmilsts = dmibar_read16(DMILSTS);
++ printk(loglevel, "DMI: Running at Gen%u x%u\n", dmilsts & 0xf, dmilsts >> 4 & 0x1f);
++}
++
++#define RETRAIN (1 << 5)
++
++#define LTRN (1 << 11)
++
++static void dmi_setup_physical_layer(void)
++{
++ /* Program DMI AFE settings, which are needed for DMI to work */
++ peg_dmi_recipe(false, 0);
++
++ /* Additional DMI programming steps */
++ dmibar_setbits32(0x258, 1 << 29);
++ dmibar_clrsetbits32(0x208, 0x7ff, 0x6b5);
++ dmibar_clrsetbits32(0x22c, 0xffff, 0x2020);
++
++ /* Write SA reference code version */
++ dmibar_write32(0x71c, 0x0000000f);
++ dmibar_write32(0x720, 0x01060200);
++
++ /* We also have to bring up the PCH side of the DMI link */
++ pch_dmi_setup_physical_layer();
++
++ /* Write-once settings */
++ dmibar_clrsetbits32(DMILCAP, 0x3f00f, 2 << 0);
++
++ printk(BIOS_DEBUG, "Retraining DMI at Gen2 speeds...\n");
++ dmi_print_link_status(BIOS_DEBUG);
++
++ /* Retrain link */
++ dmibar_setbits16(DMILCTL, RETRAIN);
++ do {} while (dmibar_read16(DMILSTS) & LTRN);
++ dmi_print_link_status(BIOS_DEBUG);
++
++ /* Retrain link again for DMI Gen2 speeds */
++ dmibar_setbits16(DMILCTL, RETRAIN);
++ do {} while (dmibar_read16(DMILSTS) & LTRN);
++ dmi_print_link_status(BIOS_INFO);
++}
++
++#define VC_ACTIVE (1U << 31)
++
++#define VCNEGPND (1 << 1)
++
++#define DMI_VC_CFG(vcid, tcmap) (VC_ACTIVE | ((vcid) << 24) | (tcmap))
++
++static void dmi_tc_vc_mapping(void)
++{
++ printk(BIOS_DEBUG, "Programming SA DMI VC/TC mappings...\n");
++
++ if (CONFIG(INTEL_LYNXPOINT_LP))
++ dmibar_setbits8(0xa78, 1 << 1);
++
++ /* Each TC is mapped to one and only one VC */
++ const u32 vc0 = DMI_VC_CFG(0, (1 << 6) | (1 << 5) | (1 << 4) | (1 << 3) | (1 << 0));
++ const u32 vc1 = DMI_VC_CFG(1, (1 << 1));
++ const u32 vcp = DMI_VC_CFG(2, (1 << 2));
++ const u32 vcm = DMI_VC_CFG(7, (1 << 7));
++ dmibar_write32(DMIVC0RCTL, vc0);
++ dmibar_write32(DMIVC1RCTL, vc1);
++ dmibar_write32(DMIVCPRCTL, vcp);
++ dmibar_write32(DMIVCMRCTL, vcm);
++
++ /* Set Extended VC Count (EVCC) to 1 if VC1 is active */
++ dmibar_clrsetbits8(DMIPVCCAP1, 7, !!(vc1 & VC_ACTIVE));
++
++ /*
++ * We also have to program the PCH side of the DMI link. Since both ends
++ * must use the same Virtual Channel settings, we pass them as arguments.
++ */
++ pch_dmi_tc_vc_mapping(vc0, vc1, vcp, vcm);
++
++ printk(BIOS_DEBUG, "Waiting for SA DMI VC negotiation... ");
++ do {} while (dmibar_read16(DMIVC0RSTS) & VCNEGPND);
++ do {} while (dmibar_read16(DMIVC1RSTS) & VCNEGPND);
++ do {} while (dmibar_read16(DMIVCPRSTS) & VCNEGPND);
++ do {} while (dmibar_read16(DMIVCMRSTS) & VCNEGPND);
++ printk(BIOS_DEBUG, "done!\n");
++}
++
++void dmi_early_init(void)
++{
++ dmi_setup_physical_layer();
++ dmi_tc_vc_mapping();
++}
+diff --git a/src/northbridge/intel/haswell/early_pcie.c b/src/northbridge/intel/haswell/early_pcie.c
+new file mode 100644
+index 0000000000..d3940e3fac
+--- /dev/null
++++ b/src/northbridge/intel/haswell/early_pcie.c
+@@ -0,0 +1,121 @@
++/* SPDX-License-Identifier: GPL-2.0-or-later */
++
++#include <console/console.h>
++#include <device/pci_def.h>
++#include <device/pci_mmio_cfg.h>
++#include <device/pci_ops.h>
++#include <northbridge/intel/haswell/haswell.h>
++#include <northbridge/intel/haswell/vcu_mailbox.h>
++#include <types.h>
++
++#define PEG_DEV(func) PCI_DEV(0, 1, func)
++
++#define MAX_PEG_FUNC 3
++
++static void peg_dmi_unset_and_set_mask_pcicfg(
++ volatile union pci_bank *const bank,
++ const uint32_t offset,
++ const uint32_t unset_mask,
++ const uint32_t set_mask,
++ const uint32_t shift,
++ const bool valid)
++{
++ if (!valid)
++ return;
++
++ volatile uint32_t *const addr = &bank->reg32[offset / sizeof(uint32_t)];
++ clrsetbits32(addr, unset_mask << shift, set_mask << shift);
++}
++
++static void peg_dmi_unset_and_set_mask_common(
++ const bool is_peg,
++ const uint32_t offset,
++ const uint32_t unset,
++ const uint32_t set,
++ const uint32_t shift,
++ const bool valid)
++{
++ const uint32_t unset_mask = unset << shift;
++ const uint32_t set_mask = set << shift;
++ if (is_peg) {
++ for (uint8_t i = 0; i < MAX_PEG_FUNC; i++)
++ pci_update_config32(PEG_DEV(i), offset, ~unset_mask, set_mask);
++ } else {
++ dmibar_clrsetbits32(offset, unset_mask, set_mask);
++ }
++}
++
++static void peg_dmi_unset_and_set_mask_vcu_mmio(
++ const uint32_t addr,
++ const uint32_t unset_mask,
++ const uint32_t set_mask,
++ const uint32_t shift,
++ const bool valid)
++{
++ if (!valid)
++ return;
++
++ vcu_update_mmio(addr, ~(unset_mask << shift), set_mask << shift);
++}
++
++#define BUNDLE_STEP 0x20
++
++static void *const dmibar = (void *)(uintptr_t)CONFIG_FIXED_DMIBAR_MMIO_BASE;
++
++void peg_dmi_recipe(const bool is_peg, const pci_devfn_t dev)
++{
++ const bool always = true;
++ const bool is_dmi = !is_peg;
++
++ /* Treat DMIBAR and PEG devices the same way */
++ volatile union pci_bank *const bank = is_peg ? pci_map_bus(dev) : dmibar;
++
++ const size_t bundles = (is_peg ? 8 : 2) * BUNDLE_STEP;
++
++ for (size_t i = 0; i < bundles; i += BUNDLE_STEP) {
++ /* These are actually per-lane */
++ peg_dmi_unset_and_set_mask_pcicfg(bank, 0xa00 + i, 0x1f, 0x0c, 0, always);
++ peg_dmi_unset_and_set_mask_pcicfg(bank, 0xa10 + i, 0x1f, 0x0c, 0, always);
++ }
++
++ for (size_t i = 0; i < bundles; i += BUNDLE_STEP)
++ peg_dmi_unset_and_set_mask_pcicfg(bank, 0x904 + i, 0x1f, 0x02, 0, is_peg);
++
++ for (size_t i = 0; i < bundles; i += BUNDLE_STEP)
++ peg_dmi_unset_and_set_mask_pcicfg(bank, 0x904 + i, 0x1f, 0x03, 5, is_peg);
++
++ for (size_t i = 0; i < bundles; i += BUNDLE_STEP)
++ peg_dmi_unset_and_set_mask_pcicfg(bank, 0x90c + i, 0x3f, 0x09, 5, always);
++
++ for (size_t i = 0; i < bundles; i += BUNDLE_STEP)
++ peg_dmi_unset_and_set_mask_pcicfg(bank, 0x90c + i, 0x0f, 0x05, 21, is_peg);
++
++ for (size_t i = 0; i < bundles; i += BUNDLE_STEP)
++ peg_dmi_unset_and_set_mask_pcicfg(bank, 0x910 + i, 0x0f, 0x08, 6, is_peg);
++
++ for (size_t i = 0; i < bundles; i += BUNDLE_STEP)
++ peg_dmi_unset_and_set_mask_pcicfg(bank, 0x910 + i, 0x0f, 0x00, 10, always);
++
++ for (size_t i = 0; i < bundles; i += BUNDLE_STEP)
++ peg_dmi_unset_and_set_mask_pcicfg(bank, 0x910 + i, 0x07, 0x00, 18, always);
++
++ peg_dmi_unset_and_set_mask_vcu_mmio(0x0c008001, 0x1f, 0x03, 25, is_peg);
++ peg_dmi_unset_and_set_mask_vcu_mmio(0x0c0c8001, 0x3f, 0x00, 23, is_dmi);
++
++ peg_dmi_unset_and_set_mask_pcicfg(bank, 0xc28, 0x1f, 0x13, 18, always);
++
++ peg_dmi_unset_and_set_mask_common(is_peg, 0xc38, 0x01, 0x00, 6, always);
++ peg_dmi_unset_and_set_mask_common(is_peg, 0x260, 0x03, 0x02, 0, always);
++
++ for (size_t i = 0; i < bundles; i += BUNDLE_STEP)
++ peg_dmi_unset_and_set_mask_pcicfg(bank, 0x900 + i, 0x03, 0x00, 26, always);
++
++ for (size_t i = 0; i < bundles; i += BUNDLE_STEP)
++ peg_dmi_unset_and_set_mask_pcicfg(bank, 0x904 + i, 0x03, 0x03, 10, always);
++
++ for (size_t i = 0; i < bundles; i += BUNDLE_STEP)
++ peg_dmi_unset_and_set_mask_pcicfg(bank, 0x90c + i, 0x1f, 0x07, 25, is_peg);
++
++ for (size_t i = 0; i < bundles; i += BUNDLE_STEP)
++ peg_dmi_unset_and_set_mask_pcicfg(bank, 0x91c + i, 0x07, 0x05, 27, is_peg);
++}
+diff --git a/src/northbridge/intel/haswell/haswell.h b/src/northbridge/intel/haswell/haswell.h
+index 1b29f6baf0..30b4abd0a7 100644
+--- a/src/northbridge/intel/haswell/haswell.h
++++ b/src/northbridge/intel/haswell/haswell.h
+@@ -34,6 +34,9 @@ void haswell_early_initialization(void);
+ void haswell_late_initialization(void);
+ void haswell_unhide_peg(void);
+
++void dmi_early_init(void);
++void peg_dmi_recipe(const bool is_peg, const pci_devfn_t dev);
++
+ void report_platform_info(void);
+
+ struct acpi_rsdp;
+diff --git a/src/northbridge/intel/haswell/native_raminit/raminit_native.c b/src/northbridge/intel/haswell/native_raminit/raminit_native.c
+index 1aafdf8659..0938e026e3 100644
+--- a/src/northbridge/intel/haswell/native_raminit/raminit_native.c
++++ b/src/northbridge/intel/haswell/native_raminit/raminit_native.c
+@@ -1,7 +1,19 @@
+ /* SPDX-License-Identifier: GPL-2.0-or-later */
+
+ #include <console/console.h>
++#include <northbridge/intel/haswell/haswell.h>
+ #include <northbridge/intel/haswell/raminit.h>
++#include <types.h>
++
++static bool early_init_native(int s3resume)
++{
++ printk(BIOS_DEBUG, "Starting native platform initialisation\n");
++
++ if (!CONFIG(INTEL_LYNXPOINT_LP))
++ dmi_early_init();
++
++ return false;
++}
+
+ void perform_raminit(const int s3resume)
+ {
+@@ -9,6 +21,9 @@ void perform_raminit(const int s3resume)
+ * See, this function's name is a lie. There are more things to
+ * do that memory initialisation, but they are relatively easy.
+ */
++ const bool cpu_replaced = early_init_native(s3resume);
++
++ (void)cpu_replaced;
+
+ /** TODO: Implement the required magic **/
+ die("NATIVE RAMINIT: More Magic (tm) required.\n");
+diff --git a/src/northbridge/intel/haswell/vcu_mailbox.c b/src/northbridge/intel/haswell/vcu_mailbox.c
+new file mode 100644
+index 0000000000..aead144023
+--- /dev/null
++++ b/src/northbridge/intel/haswell/vcu_mailbox.c
+@@ -0,0 +1,147 @@
++/* SPDX-License-Identifier: GPL-2.0-or-later */
++
++#include <assert.h>
++#include <console/console.h>
++#include <delay.h>
++#include <northbridge/intel/haswell/haswell.h>
++#include <northbridge/intel/haswell/vcu_mailbox.h>
++#include <stdint.h>
++
++/*
++ * This is a library for the VCU (Validation Control Unit) mailbox. This
++ * mailbox is primarily used to adjust some magic PCIe tuning parameters.
++ *
++ * There are two revisions of the VCU mailbox. Rev1 is specific to Haswell
++ * stepping A0, and all other steppings use Rev2. Haswell stepping A0 CPUs
++ * are early Engineering Samples with undocumented errata, and most likely
++ * need special microcode updates to boot. Thus, the code does not support
++ * VCU mailbox Rev1, because no one should need it anymore.
++ */
++
++#define VCU_MAILBOX_INTERFACE 0x6c00
++#define VCU_MAILBOX_DATA 0x6c04
++
++#define VCU_RUN_BUSY (1 << 31)
++
++enum vcu_opcode {
++ VCU_OPCODE_READ_VCU_API_VER_ID = 0x01,
++ VCU_OPCODE_OPEN_SEQ = 0x02,
++ VCU_OPCODE_CLOSE_SEQ = 0x03,
++ VCU_OPCODE_READ_DATA = 0x07,
++ VCU_OPCODE_WRITE_DATA = 0x08,
++ VCU_OPCODE_READ_CSR = 0x13,
++ VCU_OPCODE_WRITE_CSR = 0x14,
++ VCU_OPCODE_READ_MMIO = 0x15,
++ VCU_OPCODE_WRITE_MMIO = 0x16,
++};
++
++enum vcu_sequence {
++ SEQ_ID_READ_CSR = 0x1,
++ SEQ_ID_WRITE_CSR = 0x2,
++ SEQ_ID_READ_MMIO = 0x3,
++ SEQ_ID_WRITE_MMIO = 0x4,
++};
++
++#define VCU_RESPONSE_MASK 0xffff
++#define VCU_RESPONSE_SUCCESS 0x40
++#define VCU_RESPONSE_BUSY 0x80
++#define VCU_RESPONSE_THREAD_UNAVAILABLE 0x82
++#define VCU_RESPONSE_ILLEGAL 0x90
++
++/* FIXME: Use timer API */
++static void send_vcu_command(const enum vcu_opcode opcode, const uint32_t data)
++{
++ for (unsigned int i = 0; i < 10; i++) {
++ mchbar_write32(VCU_MAILBOX_DATA, data);
++ mchbar_write32(VCU_MAILBOX_INTERFACE, opcode | VCU_RUN_BUSY);
++ uint32_t vcu_interface;
++ for (unsigned int j = 0; j < 100; j++) {
++ vcu_interface = mchbar_read32(VCU_MAILBOX_INTERFACE);
++ if (!(vcu_interface & VCU_RUN_BUSY))
++ break;
++
++ udelay(10);
++ }
++ if (vcu_interface & VCU_RUN_BUSY)
++ continue;
++
++ if ((vcu_interface & VCU_RESPONSE_MASK) == VCU_RESPONSE_SUCCESS)
++ return;
++ }
++ printk(BIOS_ERR, "VCU: Failed to send command\n");
++}
++
++static enum vcu_opcode get_register_opcode(enum vcu_sequence seq)
++{
++ switch (seq) {
++ case SEQ_ID_READ_CSR:
++ return VCU_OPCODE_READ_CSR;
++ case SEQ_ID_WRITE_CSR:
++ return VCU_OPCODE_WRITE_CSR;
++ case SEQ_ID_READ_MMIO:
++ return VCU_OPCODE_READ_MMIO;
++ case SEQ_ID_WRITE_MMIO:
++ return VCU_OPCODE_WRITE_MMIO;
++ default:
++ return dead_code_t(enum vcu_opcode);
++ }
++}
++
++static enum vcu_opcode get_data_opcode(enum vcu_sequence seq)
++{
++ switch (seq) {
++ case SEQ_ID_READ_CSR:
++ case SEQ_ID_READ_MMIO:
++ return VCU_OPCODE_READ_DATA;
++ case SEQ_ID_WRITE_CSR:
++ case SEQ_ID_WRITE_MMIO:
++ return VCU_OPCODE_WRITE_DATA;
++ default:
++ return dead_code_t(enum vcu_opcode);
++ }
++}
++
++static uint32_t send_vcu_sequence(uint32_t addr, enum vcu_sequence seq, uint32_t wr_data)
++{
++ send_vcu_command(VCU_OPCODE_OPEN_SEQ, seq);
++
++ send_vcu_command(get_register_opcode(seq), addr);
++
++ send_vcu_command(get_data_opcode(seq), wr_data);
++
++ const uint32_t rd_data = mchbar_read32(VCU_MAILBOX_DATA);
++
++ send_vcu_command(VCU_OPCODE_CLOSE_SEQ, seq);
++
++ return rd_data;
++}
++
++uint32_t vcu_read_csr(uint32_t addr)
++{
++ return send_vcu_sequence(addr, SEQ_ID_READ_CSR, 0);
++}
++
++void vcu_write_csr(uint32_t addr, uint32_t data)
++{
++ send_vcu_sequence(addr, SEQ_ID_WRITE_CSR, data);
++}
++
++void vcu_update_csr(uint32_t addr, uint32_t andvalue, uint32_t orvalue)
++{
++ vcu_write_csr(addr, (vcu_read_csr(addr) & andvalue) | orvalue);
++}
++
++uint32_t vcu_read_mmio(uint32_t addr)
++{
++ return send_vcu_sequence(addr, SEQ_ID_READ_MMIO, 0);
++}
++
++void vcu_write_mmio(uint32_t addr, uint32_t data)
++{
++ send_vcu_sequence(addr, SEQ_ID_WRITE_MMIO, data);
++}
++
++void vcu_update_mmio(uint32_t addr, uint32_t andvalue, uint32_t orvalue)
++{
++ vcu_write_mmio(addr, (vcu_read_mmio(addr) & andvalue) | orvalue);
++}
+diff --git a/src/northbridge/intel/haswell/vcu_mailbox.h b/src/northbridge/intel/haswell/vcu_mailbox.h
+new file mode 100644
+index 0000000000..ba0a62e486
+--- /dev/null
++++ b/src/northbridge/intel/haswell/vcu_mailbox.h
+@@ -0,0 +1,16 @@
++/* SPDX-License-Identifier: GPL-2.0-or-later */
++
++#ifndef HASWELL_VCU_MAILBOX_H
++#define HASWELL_VCU_MAILBOX_H
++
++#include <stdint.h>
++
++uint32_t vcu_read_csr(uint32_t addr);
++void vcu_write_csr(uint32_t addr, uint32_t data);
++void vcu_update_csr(uint32_t addr, uint32_t andvalue, uint32_t orvalue);
++
++uint32_t vcu_read_mmio(uint32_t addr);
++void vcu_write_mmio(uint32_t addr, uint32_t data);
++void vcu_update_mmio(uint32_t addr, uint32_t andvalue, uint32_t orvalue);
++
++#endif /* HASWELL_VCU_MAILBOX_H */
+diff --git a/src/southbridge/intel/lynxpoint/Makefile.inc b/src/southbridge/intel/lynxpoint/Makefile.inc
+index 02022d348d..b8503ac8bc 100644
+--- a/src/southbridge/intel/lynxpoint/Makefile.inc
++++ b/src/southbridge/intel/lynxpoint/Makefile.inc
+@@ -37,6 +37,8 @@ bootblock-y += early_pch.c
+ romstage-y += early_usb.c early_me.c me_status.c early_pch.c
+ romstage-y += pmutil.c
+
++romstage-$(CONFIG_USE_NATIVE_RAMINIT) += early_pch_native.c
++
+ ifeq ($(CONFIG_INTEL_LYNXPOINT_LP),y)
+ romstage-y += lp_gpio.c
+ ramstage-y += lp_gpio.c
+diff --git a/src/southbridge/intel/lynxpoint/early_pch_native.c b/src/southbridge/intel/lynxpoint/early_pch_native.c
+new file mode 100644
+index 0000000000..c28ddfcf5d
+--- /dev/null
++++ b/src/southbridge/intel/lynxpoint/early_pch_native.c
+@@ -0,0 +1,52 @@
++/* SPDX-License-Identifier: GPL-2.0-or-later */
++
++#include <console/console.h>
++#include <device/pci_ops.h>
++#include <southbridge/intel/lynxpoint/pch.h>
++#include <types.h>
++
++void pch_dmi_setup_physical_layer(void)
++{
++ /* FIXME: We need to make sure the SA supports Gen2 as well */
++ if ((RCBA32(0x21a4) & 0x0f) == 0x02) {
++ /* Set Gen 2 Common Clock N_FTS */
++ RCBA32_AND_OR(0x2340, ~0x00ff0000, 0x3a << 16);
++
++ /* Set Target Link Speed to DMI Gen2 */
++ RCBA8_AND_OR(DLCTL2, ~0x07, 0x02);
++ }
++}
++
++#define VC_ACTIVE (1U << 31)
++
++#define VCNEGPND (1 << 1)
++
++void pch_dmi_tc_vc_mapping(const u32 vc0, const u32 vc1, const u32 vcp, const u32 vcm)
++{
++ printk(BIOS_DEBUG, "Programming PCH DMI VC/TC mappings...\n");
++
++ RCBA32_AND_OR(CIR0050, ~(0xf << 20), 2 << 20);
++ if (vcp & VC_ACTIVE)
++ RCBA32_OR(CIR0050, 1 << 19 | 1 << 17);
++
++ RCBA32(CIR0050); /* Posted Write */
++
++ /* Use the same virtual channel mapping on both ends of the DMI link */
++ RCBA32(V0CTL) = vc0;
++ RCBA32(V1CTL) = vc1;
++ RCBA32(V1CTL); /* Posted Write */
++ RCBA32(VPCTL) = vcp;
++ RCBA32(VPCTL); /* Posted Write */
++ RCBA32(VMCTL) = vcm;
++
++ /* Lock the registers */
++ RCBA32_OR(CIR0050, 1U << 31);
++ RCBA32(CIR0050); /* Posted Write */
++
++ printk(BIOS_DEBUG, "Waiting for PCH DMI VC negotiation... ");
++ do {} while (RCBA16(V0STS) & VCNEGPND);
++ do {} while (RCBA16(V1STS) & VCNEGPND);
++ do {} while (RCBA16(VPSTS) & VCNEGPND);
++ do {} while (RCBA16(VMSTS) & VCNEGPND);
++ printk(BIOS_DEBUG, "done!\n");
++}
+diff --git a/src/southbridge/intel/lynxpoint/pch.h b/src/southbridge/intel/lynxpoint/pch.h
+index 7d9fc6d6af..b5e0c2a830 100644
+--- a/src/southbridge/intel/lynxpoint/pch.h
++++ b/src/southbridge/intel/lynxpoint/pch.h
+@@ -113,6 +113,9 @@ enum pch_platform_type {
+ PCH_TYPE_ULT = 5,
+ };
+
++void pch_dmi_setup_physical_layer(void);
++void pch_dmi_tc_vc_mapping(u32 vc0, u32 vc1, u32 vcp, u32 vcm);
++
+ void usb_ehci_sleep_prepare(pci_devfn_t dev, u8 slp_typ);
+ void usb_ehci_disable(pci_devfn_t dev);
+ void usb_xhci_sleep_prepare(pci_devfn_t dev, u8 slp_typ);
+@@ -406,9 +409,10 @@ void mainboard_config_rcba(void);
+
+ /* Southbridge IO BARs */
+
++#define PMBASE 0x40
+ #define GPIOBASE 0x48
+
+-#define PMBASE 0x40
++#define CIR0050 0x0050 /* 32bit */
+
+ #define RPC 0x0400 /* 32bit */
+ #define RPFN 0x0404 /* 32bit */
+@@ -431,6 +435,20 @@ void mainboard_config_rcba(void);
+ #define IOTR2 0x1e90 /* 64bit */
+ #define IOTR3 0x1e98 /* 64bit */
+
++#define V0CTL 0x2014 /* 32bit */
++#define V0STS 0x201a /* 16bit */
++
++#define V1CTL 0x2020 /* 32bit */
++#define V1STS 0x2026 /* 16bit */
++
++#define VPCTL 0x2030 /* 32bit */
++#define VPSTS 0x2038 /* 16bit */
++
++#define VMCTL 0x2040 /* 32bit */
++#define VMSTS 0x2048 /* 16bit */
++
++#define DLCTL2 0x21b0
++
+ #define TCTL 0x3000 /* 8bit */
+
+ #define NOINT 0
+--
+2.39.2
+
diff --git a/resources/coreboot/haswell/patches/0004-haswell-lynxpoint-Add-native-early-ME-init.patch b/resources/coreboot/haswell/patches/0004-haswell-lynxpoint-Add-native-early-ME-init.patch
new file mode 100644
index 00000000..28dbc02a
--- /dev/null
+++ b/resources/coreboot/haswell/patches/0004-haswell-lynxpoint-Add-native-early-ME-init.patch
@@ -0,0 +1,148 @@
+From 98142e01fc8ebb3b762974e9e4de75e7f5c073b4 Mon Sep 17 00:00:00 2001
+From: Angel Pons <th3fanbus@gmail.com>
+Date: Fri, 6 May 2022 22:18:21 +0200
+Subject: [PATCH 04/26] haswell/lynxpoint: Add native early ME init
+
+Implement native early ME init for Lynx Point. This is only needed when
+MRC.bin is not used.
+
+Change-Id: If416e2078f139f26b4742c564b70e018725bf003
+Signed-off-by: Angel Pons <th3fanbus@gmail.com>
+---
+ .../haswell/native_raminit/raminit_native.c | 17 ++++++++++-
+ src/southbridge/intel/lynxpoint/early_me.c | 30 ++++++++++++++++++-
+ src/southbridge/intel/lynxpoint/me.h | 7 +++--
+ 3 files changed, 50 insertions(+), 4 deletions(-)
+
+diff --git a/src/northbridge/intel/haswell/native_raminit/raminit_native.c b/src/northbridge/intel/haswell/native_raminit/raminit_native.c
+index 0938e026e3..6a002548c1 100644
+--- a/src/northbridge/intel/haswell/native_raminit/raminit_native.c
++++ b/src/northbridge/intel/haswell/native_raminit/raminit_native.c
+@@ -1,18 +1,24 @@
+ /* SPDX-License-Identifier: GPL-2.0-or-later */
+
+ #include <console/console.h>
++#include <delay.h>
+ #include <northbridge/intel/haswell/haswell.h>
+ #include <northbridge/intel/haswell/raminit.h>
++#include <southbridge/intel/lynxpoint/me.h>
+ #include <types.h>
+
+ static bool early_init_native(int s3resume)
+ {
+ printk(BIOS_DEBUG, "Starting native platform initialisation\n");
+
++ intel_early_me_init();
++ /** TODO: CPU replacement check must be skipped in warm boots and S3 resumes **/
++ const bool cpu_replaced = !s3resume && intel_early_me_cpu_replacement_check();
++
+ if (!CONFIG(INTEL_LYNXPOINT_LP))
+ dmi_early_init();
+
+- return false;
++ return cpu_replaced;
+ }
+
+ void perform_raminit(const int s3resume)
+@@ -25,6 +31,15 @@ void perform_raminit(const int s3resume)
+
+ (void)cpu_replaced;
+
++ /** TODO: Move after raminit */
++ if (intel_early_me_uma_size() > 0) {
++ /** TODO: Update status once raminit is implemented **/
++ uint8_t me_status = ME_INIT_STATUS_ERROR;
++ intel_early_me_init_done(me_status);
++ }
++
++ intel_early_me_status();
++
+ /** TODO: Implement the required magic **/
+ die("NATIVE RAMINIT: More Magic (tm) required.\n");
+ }
+diff --git a/src/southbridge/intel/lynxpoint/early_me.c b/src/southbridge/intel/lynxpoint/early_me.c
+index 947c570e16..07013c5539 100644
+--- a/src/southbridge/intel/lynxpoint/early_me.c
++++ b/src/southbridge/intel/lynxpoint/early_me.c
+@@ -1,11 +1,12 @@
+ /* SPDX-License-Identifier: GPL-2.0-only */
+
+ #include <arch/io.h>
++#include <cf9_reset.h>
+ #include <device/pci_ops.h>
+ #include <console/console.h>
+ #include <delay.h>
+ #include <halt.h>
+-
++#include <timer.h>
+ #include "me.h"
+ #include "pch.h"
+
+@@ -60,6 +61,33 @@ int intel_early_me_init(void)
+ return 0;
+ }
+
++bool intel_early_me_cpu_replacement_check(void)
++{
++ printk(BIOS_DEBUG, "ME: Checking whether CPU was replaced... ");
++
++ struct stopwatch timer;
++ stopwatch_init_msecs_expire(&timer, 50);
++
++ union me_hfs2 hfs2;
++ do {
++ hfs2.raw = pci_read_config32(PCH_ME_DEV, PCI_ME_HFS2);
++ if (stopwatch_expired(&timer)) {
++ /* Assume CPU was replaced just in case */
++ printk(BIOS_DEBUG, "timed out, assuming CPU was replaced\n");
++ return true;
++ }
++ udelay(ME_DELAY);
++ } while (!hfs2.cpu_replaced_valid);
++
++ if (hfs2.warm_reset_request) {
++ printk(BIOS_DEBUG, "warm reset needed for dynamic fusing\n");
++ system_reset();
++ }
++
++ printk(BIOS_DEBUG, "%sreplaced\n", hfs2.cpu_replaced_sts ? "" : "not ");
++ return hfs2.cpu_replaced_sts;
++}
++
+ int intel_early_me_uma_size(void)
+ {
+ union me_uma uma = { .raw = pci_read_config32(PCH_ME_DEV, PCI_ME_UMA) };
+diff --git a/src/southbridge/intel/lynxpoint/me.h b/src/southbridge/intel/lynxpoint/me.h
+index fe8b0260c4..6990322651 100644
+--- a/src/southbridge/intel/lynxpoint/me.h
++++ b/src/southbridge/intel/lynxpoint/me.h
+@@ -177,14 +177,16 @@ union me_did {
+ union me_hfs2 {
+ struct __packed {
+ u32 bist_in_progress: 1;
+- u32 reserved1: 2;
++ u32 icc_prog_sts: 2;
+ u32 invoke_mebx: 1;
+ u32 cpu_replaced_sts: 1;
+ u32 mbp_rdy: 1;
+ u32 mfs_failure: 1;
+ u32 warm_reset_request: 1;
+ u32 cpu_replaced_valid: 1;
+- u32 reserved2: 4;
++ u32 reserved: 2;
++ u32 fw_upd_ipu: 1;
++ u32 reserved2: 1;
+ u32 mbp_cleared: 1;
+ u32 reserved3: 2;
+ u32 current_state: 8;
+@@ -338,6 +340,7 @@ void intel_me_status(union me_hfs hfs, union me_hfs2 hfs2);
+
+ void intel_early_me_status(void);
+ int intel_early_me_init(void);
++bool intel_early_me_cpu_replacement_check(void);
+ int intel_early_me_uma_size(void);
+ int intel_early_me_init_done(u8 status);
+
+--
+2.39.2
+
diff --git a/resources/coreboot/haswell/patches/0005-sb-intel-lynxpoint-Add-native-USB-init.patch b/resources/coreboot/haswell/patches/0005-sb-intel-lynxpoint-Add-native-USB-init.patch
new file mode 100644
index 00000000..d9c2570b
--- /dev/null
+++ b/resources/coreboot/haswell/patches/0005-sb-intel-lynxpoint-Add-native-USB-init.patch
@@ -0,0 +1,783 @@
+From 9bfb8614dbf1d9800ef8251cb3d839bcdbe5577f Mon Sep 17 00:00:00 2001
+From: Angel Pons <th3fanbus@gmail.com>
+Date: Fri, 6 May 2022 23:17:39 +0200
+Subject: [PATCH 05/26] sb/intel/lynxpoint: Add native USB init
+
+Implement native USB initialisation for Lynx Point. This is only needed
+when MRC.bin is not used.
+
+TO DO: Figure out how to deal with the FIXME's and TODO's lying around.
+
+Change-Id: Ie0fbeeca7b1ca1557173772d733fd2fa27703373
+Signed-off-by: Angel Pons <th3fanbus@gmail.com>
+---
+ .../haswell/native_raminit/raminit_native.c | 3 +
+ src/southbridge/intel/lynxpoint/Makefile.inc | 2 +-
+ src/southbridge/intel/lynxpoint/early_usb.c | 11 -
+ .../intel/lynxpoint/early_usb_native.c | 584 ++++++++++++++++++
+ src/southbridge/intel/lynxpoint/pch.h | 49 ++
+ 5 files changed, 637 insertions(+), 12 deletions(-)
+ create mode 100644 src/southbridge/intel/lynxpoint/early_usb_native.c
+
+diff --git a/src/northbridge/intel/haswell/native_raminit/raminit_native.c b/src/northbridge/intel/haswell/native_raminit/raminit_native.c
+index 6a002548c1..ef61d4ee09 100644
+--- a/src/northbridge/intel/haswell/native_raminit/raminit_native.c
++++ b/src/northbridge/intel/haswell/native_raminit/raminit_native.c
+@@ -5,6 +5,7 @@
+ #include <northbridge/intel/haswell/haswell.h>
+ #include <northbridge/intel/haswell/raminit.h>
+ #include <southbridge/intel/lynxpoint/me.h>
++#include <southbridge/intel/lynxpoint/pch.h>
+ #include <types.h>
+
+ static bool early_init_native(int s3resume)
+@@ -15,6 +16,8 @@ static bool early_init_native(int s3resume)
+ /** TODO: CPU replacement check must be skipped in warm boots and S3 resumes **/
+ const bool cpu_replaced = !s3resume && intel_early_me_cpu_replacement_check();
+
++ early_usb_init();
++
+ if (!CONFIG(INTEL_LYNXPOINT_LP))
+ dmi_early_init();
+
+diff --git a/src/southbridge/intel/lynxpoint/Makefile.inc b/src/southbridge/intel/lynxpoint/Makefile.inc
+index b8503ac8bc..0e1f2fe4eb 100644
+--- a/src/southbridge/intel/lynxpoint/Makefile.inc
++++ b/src/southbridge/intel/lynxpoint/Makefile.inc
+@@ -37,7 +37,7 @@ bootblock-y += early_pch.c
+ romstage-y += early_usb.c early_me.c me_status.c early_pch.c
+ romstage-y += pmutil.c
+
+-romstage-$(CONFIG_USE_NATIVE_RAMINIT) += early_pch_native.c
++romstage-$(CONFIG_USE_NATIVE_RAMINIT) += early_pch_native.c early_usb_native.c iobp.c
+
+ ifeq ($(CONFIG_INTEL_LYNXPOINT_LP),y)
+ romstage-y += lp_gpio.c
+diff --git a/src/southbridge/intel/lynxpoint/early_usb.c b/src/southbridge/intel/lynxpoint/early_usb.c
+index a753681ce0..52e8ac17f8 100644
+--- a/src/southbridge/intel/lynxpoint/early_usb.c
++++ b/src/southbridge/intel/lynxpoint/early_usb.c
+@@ -4,17 +4,6 @@
+ #include <device/pci_def.h>
+ #include "pch.h"
+
+-/* HCD_INDEX == 2 selects 0:1a.0 (PCH_EHCI2), any other index
+- * selects 0:1d.0 (PCH_EHCI1) for usbdebug use.
+- */
+-#if CONFIG_USBDEBUG_HCD_INDEX != 2
+-#define PCH_EHCI1_TEMP_BAR0 CONFIG_EHCI_BAR
+-#define PCH_EHCI2_TEMP_BAR0 (PCH_EHCI1_TEMP_BAR0 + 0x400)
+-#else
+-#define PCH_EHCI2_TEMP_BAR0 CONFIG_EHCI_BAR
+-#define PCH_EHCI1_TEMP_BAR0 (PCH_EHCI2_TEMP_BAR0 + 0x400)
+-#endif
+-
+ /*
+ * Setup USB controller MMIO BAR to prevent the
+ * reference code from resetting the controller.
+diff --git a/src/southbridge/intel/lynxpoint/early_usb_native.c b/src/southbridge/intel/lynxpoint/early_usb_native.c
+new file mode 100644
+index 0000000000..cb6f6ee8e6
+--- /dev/null
++++ b/src/southbridge/intel/lynxpoint/early_usb_native.c
+@@ -0,0 +1,584 @@
++/* SPDX-License-Identifier: GPL-2.0-or-later */
++
++#include <console/console.h>
++#include <delay.h>
++#include <device/mmio.h>
++#include <device/pci_def.h>
++#include <device/pci_ops.h>
++#include <northbridge/intel/haswell/haswell.h>
++#include <northbridge/intel/haswell/raminit.h>
++#include <southbridge/intel/lynxpoint/iobp.h>
++#include <southbridge/intel/lynxpoint/pch.h>
++#include <timer.h>
++#include <types.h>
++
++static unsigned int is_usbr_enabled(void)
++{
++ return !!(pci_read_config32(PCH_XHCI_DEV, XHCI_USB3FUS) & BIT(5));
++}
++
++static char *const xhci_bar = (char *)PCH_XHCI_TEMP_BAR0;
++
++static void ehci_hcs_init(const pci_devfn_t dev, const uintptr_t ehci_bar)
++{
++ pci_write_config32(dev, PCI_BASE_ADDRESS_0, ehci_bar);
++
++ /** FIXME: Determine whether Bus Master is required (or clean it up afterwards) **/
++ pci_or_config16(dev, PCI_COMMAND, PCI_COMMAND_MASTER | PCI_COMMAND_MEMORY);
++
++ char *const mem_bar = (char *)ehci_bar;
++
++ /**
++ * Shared EHCI/XHCI ports w/a.
++ * This step is required when some of the ports are routed to EHCI
++ * and other ports are routed XHCI at the same time.
++ *
++ * FIXME: Under which conditions should this be done?
++ */
++ pci_and_config16(dev, 0x78, ~0x03);
++
++ /* Skip reset if usbdebug is enabled */
++ if (!CONFIG(USBDEBUG_IN_PRE_RAM))
++ setbits32(mem_bar + EHCI_USB_CMD, EHCI_USB_CMD_HCRESET);
++
++ /* 2: Configure number of controllers and ports */
++ pci_or_config16(dev, EHCI_ACCESS_CNTL, ACCESS_CNTL_ENABLE);
++ clrsetbits32(mem_bar + EHCI_HCS_PARAMS, 0xf << 12, 0);
++ clrsetbits32(mem_bar + EHCI_HCS_PARAMS, 0xf << 0, 2 + is_usbr_enabled());
++ pci_and_config16(dev, EHCI_ACCESS_CNTL, ~ACCESS_CNTL_ENABLE);
++
++ pci_or_config16(dev, 0x78, BIT(2));
++ pci_or_config16(dev, 0x7c, BIT(14) | BIT(7));
++ pci_update_config32(dev, 0x8c, ~(0xf << 8), (4 << 8));
++ pci_update_config32(dev, 0x8c, ~BIT(26), BIT(17));
++}
++
++static inline unsigned int physical_port_count(void)
++{
++ return MAX_USB2_PORTS;
++}
++
++static unsigned int hs_port_count(void)
++{
++ /** TODO: Apparently, WPT-LP has 10 USB2 ports **/
++ if (CONFIG(INTEL_LYNXPOINT_LP))
++ return 8;
++
++ switch ((pci_read_config32(PCH_XHCI_DEV, XHCI_USB3FUS) >> 1) & 3) {
++ case 3:
++ return 8;
++ case 2:
++ return 10;
++ case 1:
++ return 12;
++ case 0:
++ default:
++ return 14;
++ }
++}
++
++static unsigned int ss_port_count(void)
++{
++ if (CONFIG(INTEL_LYNXPOINT_LP))
++ return 4;
++
++ switch ((pci_read_config32(PCH_XHCI_DEV, XHCI_USB3FUS) >> 3) & 3) {
++ case 3:
++ return 0;
++ case 2:
++ return 2;
++ case 1:
++ return 4;
++ case 0:
++ default:
++ return 6;
++ }
++}
++
++static void common_ehci_hcs_init(void)
++{
++ const bool is_lp = CONFIG(INTEL_LYNXPOINT_LP);
++
++ ehci_hcs_init(PCH_EHCI1_DEV, PCH_EHCI1_TEMP_BAR0);
++ if (!is_lp)
++ ehci_hcs_init(PCH_EHCI2_DEV, PCH_EHCI2_TEMP_BAR0);
++
++ pch_iobp_update(0xe5007f04, 0, 0x00004481);
++
++ for (unsigned int port = 0; port < physical_port_count(); port++)
++ pch_iobp_update(0xe500400f + port * 0x100, ~(1 << 0), 0 << 0);
++
++ pch_iobp_update(0xe5007f14, ~(3 << 19), (3 << 19));
++
++ if (is_lp)
++ pch_iobp_update(0xe5007f02, ~(3 << 22), (0 << 22));
++}
++
++static void xhci_open_memory_space(void)
++{
++ /** FIXME: Determine whether Bus Master is required (or clean it up afterwards) **/
++ pci_write_config32(PCH_XHCI_DEV, PCI_BASE_ADDRESS_0, (uintptr_t)xhci_bar);
++ pci_or_config16(PCH_XHCI_DEV, PCI_COMMAND, PCI_COMMAND_MASTER | PCI_COMMAND_MEMORY);
++}
++
++static void xhci_close_memory_space(void)
++{
++ pci_and_config16(PCH_XHCI_DEV, PCI_COMMAND, ~(PCI_COMMAND_MASTER | PCI_COMMAND_MEMORY));
++ pci_write_config32(PCH_XHCI_DEV, PCI_BASE_ADDRESS_0, 0);
++}
++
++static void common_xhci_hc_init(void)
++{
++ const bool is_lp = CONFIG(INTEL_LYNXPOINT_LP);
++
++ if (!is_lp) {
++ const unsigned int max_ports = 15 + ss_port_count();
++ clrsetbits32(xhci_bar + XHCI_HCS_PARAMS_1, 0xf << 28, max_ports << 28);
++ }
++
++ clrsetbits32(xhci_bar + XHCI_HCS_PARAMS_3, 0xffff << 16 | 0xff, 0x200 << 16 | 0x0a);
++ clrsetbits32(xhci_bar + XHCI_HCC_PARAMS, BIT(5), BIT(10) | BIT(9));
++
++ if (!is_lp)
++ clrsetbits32(xhci_bar + 0x8008, BIT(19), 0);
++
++ if (is_lp)
++ clrsetbits32(xhci_bar + 0x8058, BIT(8), BIT(16));
++ else
++ clrsetbits32(xhci_bar + 0x8058, BIT(8), BIT(16) | BIT(20));
++
++ clrsetbits32(xhci_bar + 0x8060, 0, BIT(25) | BIT(18));
++ clrsetbits32(xhci_bar + 0x8090, 0, BIT(14) | BIT(8));
++ clrsetbits32(xhci_bar + 0x8094, 0, BIT(23) | BIT(21) | BIT(14));
++ clrsetbits32(xhci_bar + 0x80e0, BIT(16), BIT(6));
++ clrsetbits32(xhci_bar + 0x80ec, (7 << 12) | (7 << 9), (0 << 12) | (6 << 9));
++ clrsetbits32(xhci_bar + 0x80f0, BIT(20), 0);
++
++ if (is_lp)
++ clrsetbits32(xhci_bar + 0x80fc, 0, BIT(25));
++
++ if (is_lp)
++ clrsetbits32(xhci_bar + 0x8110, BIT(8) | BIT(2), BIT(20) | BIT(11));
++ else
++ clrsetbits32(xhci_bar + 0x8110, BIT(2), BIT(20) | BIT(11));
++
++ if (is_lp)
++ write32(xhci_bar + 0x8140, 0xff00f03c);
++ else
++ write32(xhci_bar + 0x8140, 0xff03c132);
++
++ if (is_lp)
++ clrsetbits32(xhci_bar + 0x8154, BIT(21), BIT(13));
++ else
++ clrsetbits32(xhci_bar + 0x8154, BIT(21) | BIT(13), 0);
++
++ clrsetbits32(xhci_bar + 0x8154, BIT(3), 0);
++
++ if (is_lp) {
++ clrsetbits32(xhci_bar + 0x8164, 0, BIT(1) | BIT(0));
++ write32(xhci_bar + 0x8174, 0x01400c0a);
++ write32(xhci_bar + 0x817c, 0x033200a3);
++ write32(xhci_bar + 0x8180, 0x00cb0028);
++ write32(xhci_bar + 0x8184, 0x0064001e);
++ }
++
++ /*
++ * Note: Register at offset 0x44 is 32-bit, but bit 31 is write-once.
++ * We use these weird partial accesses here to avoid locking bit 31.
++ */
++ pci_or_config16(PCH_XHCI_DEV, 0x44, BIT(15) | BIT(14) | BIT(10) | BIT(0));
++ pci_or_config8(PCH_XHCI_DEV, 0x44 + 2, 0x0f);
++
++ /* LPT-LP >= B0 */
++ if (is_lp)
++ clrsetbits32(xhci_bar + 0x8188, 0, BIT(26) | BIT(24));
++
++ /* LPT-H >= C0 */
++ if (!is_lp)
++ clrsetbits32(xhci_bar + 0x8188, 0, BIT(24));
++}
++
++static inline bool is_mem_sr(void)
++{
++ return pci_read_config16(PCH_LPC_DEV, GEN_PMCON_2) & GEN_PMCON_2_MEM_SR;
++}
++
++static bool should_restore_xhci_smart_auto(void)
++{
++ if (!is_mem_sr())
++ return false;
++
++ return pci_read_config32(PCH_LPC_DEV, PMIR) & PMIR_XHCI_SMART_AUTO;
++}
++
++enum usb_port_route {
++ ROUTE_TO_EHCI,
++ ROUTE_TO_XHCI,
++};
++
++/* Returns whether port reset was successful */
++static bool reset_usb2_ports(const unsigned int ehci_ports)
++{
++ for (unsigned int port = 0; port < ehci_ports; port++) {
++ /* Initiate port reset for all USB2 ports */
++ clrsetbits32(
++ xhci_bar + XHCI_USB2_PORTSC(port),
++ XHCI_USB2_PORTSC_PED,
++ XHCI_USB2_PORTSC_PR);
++ }
++ /* Poll for port reset bit to be cleared or time out at 100ms */
++ struct stopwatch timer;
++ stopwatch_init_msecs_expire(&timer, 100);
++ uint32_t reg32;
++ do {
++ reg32 = 0;
++ for (unsigned int port = 0; port < ehci_ports; port++)
++ reg32 |= read32(xhci_bar + XHCI_USB2_PORTSC(port));
++
++ reg32 &= XHCI_USB2_PORTSC_PR;
++ if (!reg32) {
++ const long elapsed_time = stopwatch_duration_usecs(&timer);
++ printk(BIOS_DEBUG, "%s: took %lu usecs\n", __func__, elapsed_time);
++ return true;
++ }
++ /* Reference code has a 10 ms delay here, but a smaller delay works too */
++ udelay(100);
++ } while (!stopwatch_expired(&timer));
++ printk(BIOS_ERR, "%s: timed out\n", __func__);
++ return !reg32;
++}
++
++/* Returns whether warm reset was successful */
++static bool warm_reset_usb3_ports(const unsigned int xhci_ports)
++{
++ for (unsigned int port = 0; port < xhci_ports; port++) {
++ /* Initiate warm reset for all USB3 ports */
++ clrsetbits32(
++ xhci_bar + XHCI_USB3_PORTSC(port),
++ XHCI_USB3_PORTSC_PED,
++ XHCI_USB3_PORTSC_WPR);
++ }
++ /* Poll for port reset bit to be cleared or time out at 100ms */
++ struct stopwatch timer;
++ stopwatch_init_msecs_expire(&timer, 100);
++ uint32_t reg32;
++ do {
++ reg32 = 0;
++ for (unsigned int port = 0; port < xhci_ports; port++)
++ reg32 |= read32(xhci_bar + XHCI_USB3_PORTSC(port));
++
++ reg32 &= XHCI_USB3_PORTSC_PR;
++ if (!reg32) {
++ const long elapsed_time = stopwatch_duration_usecs(&timer);
++ printk(BIOS_DEBUG, "%s: took %lu usecs\n", __func__, elapsed_time);
++ return true;
++ }
++ /* Reference code has a 10 ms delay here, but a smaller delay works too */
++ udelay(100);
++ } while (!stopwatch_expired(&timer));
++ printk(BIOS_ERR, "%s: timed out\n", __func__);
++ return !reg32;
++}
++
++static void perform_xhci_ehci_switching_flow(const enum usb_port_route usb_route)
++{
++ const pci_devfn_t dev = PCH_XHCI_DEV;
++
++ const unsigned int ehci_ports = hs_port_count() + is_usbr_enabled();
++ const unsigned int xhci_ports = ss_port_count();
++
++ const uint32_t ehci_mask = BIT(ehci_ports) - 1;
++ const uint32_t xhci_mask = BIT(xhci_ports) - 1;
++
++ /** TODO: Handle USBr port? How, though? **/
++ pci_update_config32(dev, XHCI_USB2PRM, ~XHCI_USB2PR_HCSEL, ehci_mask);
++ pci_update_config32(dev, XHCI_USB3PRM, ~XHCI_USB3PR_SSEN, xhci_mask);
++
++ /*
++ * Workaround for USB2PR / USB3PR value not surviving warm reset.
++ * Restore USB Port Routing registers if OS HC Switch driver has been executed.
++ */
++ if (should_restore_xhci_smart_auto()) {
++ /** FIXME: Derive values from mainboard code instead? **/
++ pci_update_config32(dev, XHCI_USB2PR, ~XHCI_USB2PR_HCSEL, ehci_mask);
++ pci_update_config32(dev, XHCI_USB3PR, ~XHCI_USB3PR_SSEN, xhci_mask);
++ }
++
++ /* Later stages shouldn't need the value of this bit */
++ pci_and_config32(PCH_LPC_DEV, PMIR, ~PMIR_XHCI_SMART_AUTO);
++
++ /**
++ * FIXME: Things here depend on the chosen routing mode.
++ * For now, implement both functions.
++ */
++
++ /* Route to EHCI if xHCI disabled or auto mode */
++ if (usb_route == ROUTE_TO_EHCI) {
++ if (!reset_usb2_ports(ehci_ports))
++ printk(BIOS_ERR, "USB2 port reset timed out\n");
++
++ pci_and_config32(dev, XHCI_USB2PR, ~XHCI_USB2PR_HCSEL);
++
++ for (unsigned int port = 0; port < ehci_ports; port++) {
++ clrsetbits32(
++ xhci_bar + XHCI_USB2_PORTSC(port),
++ XHCI_USB2_PORTSC_PED,
++ XHCI_USB2_PORTSC_CHST);
++ }
++
++ if (!warm_reset_usb3_ports(xhci_ports))
++ printk(BIOS_ERR, "USB3 warm reset timed out\n");
++
++ /* FIXME: BWG says this should be inside the warm reset function */
++ pci_and_config32(dev, XHCI_USB3PR, ~XHCI_USB3PR_SSEN);
++
++ for (unsigned int port = 0; port < ehci_ports; port++) {
++ clrsetbits32(
++ xhci_bar + XHCI_USB3_PORTSC(port),
++ XHCI_USB3_PORTSC_PED,
++ XHCI_USB3_PORTSC_CHST);
++ }
++
++ setbits32(xhci_bar + XHCI_USBCMD, BIT(0));
++ clrbits32(xhci_bar + XHCI_USBCMD, BIT(0));
++ }
++
++ /* Route to xHCI if xHCI enabled */
++ if (usb_route == ROUTE_TO_XHCI) {
++ if (is_mem_sr()) {
++ if (!warm_reset_usb3_ports(xhci_ports))
++ printk(BIOS_ERR, "USB3 warm reset timed out\n");
++ }
++
++ const uint32_t xhci_port_mask = pci_read_config32(dev, XHCI_USB3PRM) & 0x3f;
++ pci_update_config32(dev, XHCI_USB3PR, ~XHCI_USB3PR_SSEN, xhci_port_mask);
++
++ const uint32_t ehci_port_mask = pci_read_config32(dev, XHCI_USB2PRM) & 0x7fff;
++ pci_update_config32(dev, XHCI_USB2PR, ~XHCI_USB2PR_HCSEL, ehci_port_mask);
++ }
++}
++
++/* Do not shift in this macro, as it can cause undefined behaviour for bad port/oc values */
++#define PORT_TO_OC_SHIFT(port, oc) ((oc) * 8 + (port))
++
++/* Avoid shifting into undefined behaviour */
++static inline bool shift_ok(const int shift)
++{
++ return shift >= 0 && shift < 32;
++}
++
++static void usb_overcurrent_mapping(void)
++{
++ const bool is_lp = CONFIG(INTEL_LYNXPOINT_LP);
++
++ uint32_t ehci_1_ocmap = 0;
++ uint32_t ehci_2_ocmap = 0;
++ uint32_t xhci_1_ocmap = 0;
++ uint32_t xhci_2_ocmap = 0;
++
++ /*
++ * EHCI
++ */
++ for (unsigned int idx = 0; idx < physical_port_count(); idx++) {
++ const struct usb2_port_config *const port = &mainboard_usb2_ports[idx];
++ printk(BIOS_DEBUG, "USB2 port %u => ", idx);
++ if (!port->enable) {
++ printk(BIOS_DEBUG, "disabled\n");
++ continue;
++ }
++ const unsigned short oc_pin = port->oc_pin;
++ if (oc_pin == USB_OC_PIN_SKIP) {
++ printk(BIOS_DEBUG, "not mapped to OC pin\n");
++ continue;
++ }
++ /* Ports 0 .. 7 => OC 0 .. 3 */
++ if (idx < 8 && oc_pin <= 3) {
++ const int shift = PORT_TO_OC_SHIFT(idx, oc_pin);
++ if (shift_ok(shift)) {
++ printk(BIOS_DEBUG, "mapped to OC pin %u\n", oc_pin);
++ ehci_1_ocmap |= 1 << shift;
++ continue;
++ }
++ }
++ /* Ports 8 .. 13 => OC 4 .. 7 (LPT-H only) */
++ if (!is_lp && idx >= 8 && oc_pin >= 4) {
++ const int shift = PORT_TO_OC_SHIFT(idx, oc_pin - 4);
++ if (shift_ok(shift)) {
++ printk(BIOS_DEBUG, "mapped to OC pin %u\n", oc_pin);
++ ehci_2_ocmap |= 1 << shift;
++ continue;
++ }
++ }
++ printk(BIOS_ERR, "Invalid OC pin %u for USB2 port %u\n", oc_pin, idx);
++ }
++ printk(BIOS_DEBUG, "\n");
++ pci_write_config32(PCH_EHCI1_DEV, EHCI_OCMAP, ehci_1_ocmap);
++ if (!is_lp)
++ pci_write_config32(PCH_EHCI2_DEV, EHCI_OCMAP, ehci_2_ocmap);
++
++ /*
++ * xHCI
++ */
++ for (unsigned int idx = 0; idx < ss_port_count(); idx++) {
++ const struct usb3_port_config *const port = &mainboard_usb3_ports[idx];
++ printk(BIOS_DEBUG, "USB3 port %u => ", idx);
++ if (!port->enable) {
++ printk(BIOS_DEBUG, "disabled\n");
++ continue;
++ }
++ const unsigned short oc_pin = port->oc_pin;
++ if (oc_pin == USB_OC_PIN_SKIP) {
++ printk(BIOS_DEBUG, "not mapped to OC pin\n");
++ continue;
++ }
++ /* Ports 0 .. 5 => OC 0 .. 3 */
++ if (oc_pin <= 3) {
++ const int shift = PORT_TO_OC_SHIFT(idx, oc_pin);
++ if (shift_ok(shift)) {
++ printk(BIOS_DEBUG, "mapped to OC pin %u\n", oc_pin);
++ xhci_1_ocmap |= 1 << shift;
++ continue;
++ }
++ }
++ /* Ports 0 .. 5 => OC 4 .. 7 (LPT-H only) */
++ if (!is_lp && oc_pin >= 4) {
++ const int shift = PORT_TO_OC_SHIFT(idx, oc_pin - 4);
++ if (shift_ok(shift)) {
++ printk(BIOS_DEBUG, "mapped to OC pin %u\n", oc_pin);
++ xhci_2_ocmap |= 1 << shift;
++ continue;
++ }
++ }
++ printk(BIOS_ERR, "Invalid OC pin %u for USB3 port %u\n", oc_pin, idx);
++ }
++ printk(BIOS_DEBUG, "\n");
++ pci_write_config32(PCH_XHCI_DEV, XHCI_U2OCM1, ehci_1_ocmap);
++ pci_write_config32(PCH_XHCI_DEV, XHCI_U3OCM1, xhci_1_ocmap);
++ if (!is_lp) {
++ pci_write_config32(PCH_XHCI_DEV, XHCI_U2OCM2, ehci_2_ocmap);
++ pci_write_config32(PCH_XHCI_DEV, XHCI_U3OCM2, xhci_2_ocmap);
++ }
++}
++
++static uint8_t get_ehci_tune_param_1(const struct usb2_port_config *const port)
++{
++ const bool is_lp = CONFIG(INTEL_LYNXPOINT_LP);
++
++ const enum pch_platform_type plat_type = get_pch_platform_type();
++ const enum usb2_port_location location = port->location;
++ const uint16_t length = port->length;
++ if (!is_lp) {
++ if (plat_type == PCH_TYPE_DESKTOP) {
++ if (location == USB_PORT_BACK_PANEL)
++ return 4; /* Back Panel */
++ else
++ return 3; /* Front Panel */
++
++ } else if (plat_type == PCH_TYPE_MOBILE) {
++ if (location == USB_PORT_INTERNAL)
++ return 5; /* Internal Topology */
++ else if (location == USB_PORT_DOCK)
++ return 4; /* Dock */
++ else if (length < 0x70)
++ return 5; /* Back Panel, less than 7" */
++ else
++ return 6; /* Back Panel, 7" or more */
++ }
++ } else {
++ if (location == USB_PORT_BACK_PANEL || location == USB_PORT_MINI_PCIE) {
++ if (length < 0x70)
++ return 5; /* Back Panel, less than 7" */
++ else
++ return 6; /* Back Panel, 7" or more */
++ } else if (location == USB_PORT_DOCK) {
++ return 4; /* Dock */
++ } else {
++ return 5; /* Internal Topology */
++ }
++ }
++ printk(BIOS_ERR, "%s: Unhandled case\n", __func__);
++ return 0;
++}
++
++static uint8_t get_ehci_tune_param_2(const struct usb2_port_config *const port)
++{
++ const bool is_lp = CONFIG(INTEL_LYNXPOINT_LP);
++
++ const enum pch_platform_type plat_type = get_pch_platform_type();
++ const enum usb2_port_location location = port->location;
++ const uint16_t length = port->length;
++ if (!is_lp) {
++ if (plat_type == PCH_TYPE_DESKTOP) {
++ if (location == USB_PORT_BACK_PANEL) {
++ if (length < 0x80)
++ return 2; /* Back Panel, less than 8" */
++ else if (length < 0x130)
++ return 3; /* Back Panel, 8"-13" */
++ else
++ return 4; /* Back Panel, 13" or more */
++ } else {
++ return 2; /* Front Panel */
++ }
++
++ } else if (plat_type == PCH_TYPE_MOBILE) {
++ if (location == USB_PORT_INTERNAL) {
++ return 2; /* Internal Topology */
++ } else if (location == USB_PORT_DOCK) {
++ if (length < 0x50)
++ return 1; /* Dock, less than 5" */
++ else
++ return 2; /* Dock, 5" or more */
++ } else {
++ if (length < 0x100)
++ return 2; /* Back Panel, less than 10" */
++ else
++ return 3; /* Back Panel, 10" or more */
++ }
++ }
++ } else {
++ if (location == USB_PORT_BACK_PANEL || location == USB_PORT_MINI_PCIE) {
++ if (length < 0x100)
++ return 2; /* Back Panel, less than 10" */
++ else
++ return 3; /* Back Panel, 10" or more */
++ } else if (location == USB_PORT_DOCK) {
++ if (length < 0x50)
++ return 1; /* Dock, less than 5" */
++ else
++ return 2; /* Dock, 5" or more */
++ } else {
++ return 2; /* Internal Topology */
++ }
++ }
++ printk(BIOS_ERR, "%s: Unhandled case\n", __func__);
++ return 0;
++}
++
++static void program_ehci_port_length(void)
++{
++ for (unsigned int port = 0; port < physical_port_count(); port++) {
++ if (!mainboard_usb2_ports[port].enable)
++ continue;
++ const uint32_t addr = 0xe5004000 + (port + 1) * 0x100;
++ const uint8_t param_1 = get_ehci_tune_param_1(&mainboard_usb2_ports[port]);
++ const uint8_t param_2 = get_ehci_tune_param_2(&mainboard_usb2_ports[port]);
++ pch_iobp_update(addr, ~0x7f00, param_2 << 11 | param_1 << 8);
++ }
++}
++
++void early_usb_init(void)
++{
++ /** TODO: Make this configurable? How do the modes affect usbdebug? **/
++ const enum usb_port_route usb_route = ROUTE_TO_XHCI;
++ ///(pd->boot_mode == 2 && pd->usb_xhci_on_resume) ? ROUTE_TO_XHCI : ROUTE_TO_EHCI;
++
++ common_ehci_hcs_init();
++ xhci_open_memory_space();
++ common_xhci_hc_init();
++ perform_xhci_ehci_switching_flow(usb_route);
++ usb_overcurrent_mapping();
++ program_ehci_port_length();
++ /** FIXME: USB per port control is missing, is it needed? **/
++ xhci_close_memory_space();
++ /** TODO: Close EHCI memory space? **/
++}
+diff --git a/src/southbridge/intel/lynxpoint/pch.h b/src/southbridge/intel/lynxpoint/pch.h
+index b5e0c2a830..ad983d86cf 100644
+--- a/src/southbridge/intel/lynxpoint/pch.h
++++ b/src/southbridge/intel/lynxpoint/pch.h
+@@ -115,6 +115,7 @@ enum pch_platform_type {
+
+ void pch_dmi_setup_physical_layer(void);
+ void pch_dmi_tc_vc_mapping(u32 vc0, u32 vc1, u32 vcp, u32 vcm);
++void early_usb_init(void);
+
+ void usb_ehci_sleep_prepare(pci_devfn_t dev, u8 slp_typ);
+ void usb_ehci_disable(pci_devfn_t dev);
+@@ -202,6 +203,8 @@ void mainboard_config_rcba(void);
+ #define GEN_PMCON_1 0xa0
+ #define SMI_LOCK (1 << 4)
+ #define GEN_PMCON_2 0xa2
++#define GEN_PMCON_2_DISB (1 << 7)
++#define GEN_PMCON_2_MEM_SR (1 << 5)
+ #define SYSTEM_RESET_STS (1 << 4)
+ #define THERMTRIP_STS (1 << 3)
+ #define SYSPWR_FLR (1 << 1)
+@@ -215,6 +218,7 @@ void mainboard_config_rcba(void);
+ #define PMIR 0xac
+ #define PMIR_CF9LOCK (1 << 31)
+ #define PMIR_CF9GR (1 << 20)
++#define PMIR_XHCI_SMART_AUTO (1 << 16) /* c.f. LPT BWG or WPT-LP BIOS spec */
+
+ /* GEN_PMCON_3 bits */
+ #define RTC_BATTERY_DEAD (1 << 2)
+@@ -282,6 +286,20 @@ void mainboard_config_rcba(void);
+ #define SATA_DTLE_DATA_SHIFT 24
+ #define SATA_DTLE_EDGE_SHIFT 16
+
++/*
++ * HCD_INDEX == 2 selects 0:1a.0 (PCH_EHCI2), any other index
++ * selects 0:1d.0 (PCH_EHCI1) for usbdebug use.
++ */
++#if CONFIG_USBDEBUG_HCD_INDEX != 2
++#define PCH_EHCI1_TEMP_BAR0 CONFIG_EHCI_BAR
++#define PCH_EHCI2_TEMP_BAR0 (PCH_EHCI1_TEMP_BAR0 + 0x400)
++#else
++#define PCH_EHCI2_TEMP_BAR0 CONFIG_EHCI_BAR
++#define PCH_EHCI1_TEMP_BAR0 (PCH_EHCI2_TEMP_BAR0 + 0x400)
++#endif
++
++#define PCH_XHCI_TEMP_BAR0 0xe8100000
++
+ /* EHCI PCI Registers */
+ #define EHCI_PWR_CTL_STS 0x54
+ #define PWR_CTL_SET_MASK 0x3
+@@ -289,10 +307,15 @@ void mainboard_config_rcba(void);
+ #define PWR_CTL_SET_D3 0x3
+ #define PWR_CTL_ENABLE_PME (1 << 8)
+ #define PWR_CTL_STATUS_PME (1 << 15)
++#define EHCI_OCMAP 0x74
++#define EHCI_ACCESS_CNTL 0x80
++#define ACCESS_CNTL_ENABLE (1 << 0)
+
+ /* EHCI Memory Registers */
++#define EHCI_HCS_PARAMS 0x04
+ #define EHCI_USB_CMD 0x20
+ #define EHCI_USB_CMD_RUN (1 << 0)
++#define EHCI_USB_CMD_HCRESET (1 << 1)
+ #define EHCI_USB_CMD_PSE (1 << 4)
+ #define EHCI_USB_CMD_ASE (1 << 5)
+ #define EHCI_PORTSC(port) (0x64 + (port) * 4)
+@@ -301,6 +324,10 @@ void mainboard_config_rcba(void);
+
+ /* XHCI PCI Registers */
+ #define XHCI_PWR_CTL_STS 0x74
++#define XHCI_U2OCM1 0xc0
++#define XHCI_U2OCM2 0xc4
++#define XHCI_U3OCM1 0xc8
++#define XHCI_U3OCM2 0xcc
+ #define XHCI_USB2PR 0xd0
+ #define XHCI_USB2PRM 0xd4
+ #define XHCI_USB2PR_HCSEL 0x7fff
+@@ -313,6 +340,27 @@ void mainboard_config_rcba(void);
+ #define XHCI_USB3PDO 0xe8
+
+ /* XHCI Memory Registers */
++#define XHCI_HCS_PARAMS_1 0x04
++#define XHCI_HCS_PARAMS_2 0x08
++#define XHCI_HCS_PARAMS_3 0x0c
++#define XHCI_HCC_PARAMS 0x10
++#define XHCI_USBCMD 0x80
++#define XHCI_USB2_PORTSC(port) (0x480 + ((port) * 0x10))
++#define XHCI_USB2_PORTSC_WPR (1 << 31) /* Warm Port Reset */
++#define XHCI_USB2_PORTSC_CEC (1 << 23) /* Port Config Error Change */
++#define XHCI_USB2_PORTSC_PLC (1 << 22) /* Port Link State Change */
++#define XHCI_USB2_PORTSC_PRC (1 << 21) /* Port Reset Change */
++#define XHCI_USB2_PORTSC_OCC (1 << 20) /* Over-current Change */
++#define XHCI_USB2_PORTSC_WRC (1 << 19) /* Warm Port Reset Change */
++#define XHCI_USB2_PORTSC_PEC (1 << 18) /* Port Enabled Disabled Change */
++#define XHCI_USB2_PORTSC_CSC (1 << 17) /* Connect Status Change */
++#define XHCI_USB2_PORTSC_CHST (0x7f << 17)
++#define XHCI_USB2_PORTSC_LWS (1 << 16) /* Port Link State Write Strobe */
++#define XHCI_USB2_PORTSC_PP (1 << 9)
++#define XHCI_USB2_PORTSC_PR (1 << 4) /* Port Reset */
++#define XHCI_USB2_PORTSC_PED (1 << 1) /* Port Enable/Disabled */
++#define XHCI_USB2_PORTSC_CCS (1 << 0) /* Current Connect Status */
++
+ #define XHCI_USB3_PORTSC(port) ((pch_is_lp() ? 0x510 : 0x570) + ((port) * 0x10))
+ #define XHCI_USB3_PORTSC_CHST (0x7f << 17)
+ #define XHCI_USB3_PORTSC_WCE (1 << 25) /* Wake on Connect */
+@@ -320,6 +368,7 @@ void mainboard_config_rcba(void);
+ #define XHCI_USB3_PORTSC_WOE (1 << 27) /* Wake on Overcurrent */
+ #define XHCI_USB3_PORTSC_WRC (1 << 19) /* Warm Reset Complete */
+ #define XHCI_USB3_PORTSC_LWS (1 << 16) /* Link Write Strobe */
++#define XHCI_USB3_PORTSC_PR (1 << 4) /* Port Reset */
+ #define XHCI_USB3_PORTSC_PED (1 << 1) /* Port Enabled/Disabled */
+ #define XHCI_USB3_PORTSC_WPR (1 << 31) /* Warm Port Reset */
+ #define XHCI_USB3_PORTSC_PLS (0xf << 5) /* Port Link State */
+--
+2.39.2
+
diff --git a/resources/coreboot/haswell/patches/0006-sb-intel-lynxpoint-Add-native-thermal-init.patch b/resources/coreboot/haswell/patches/0006-sb-intel-lynxpoint-Add-native-thermal-init.patch
new file mode 100644
index 00000000..157d2999
--- /dev/null
+++ b/resources/coreboot/haswell/patches/0006-sb-intel-lynxpoint-Add-native-thermal-init.patch
@@ -0,0 +1,128 @@
+From 92be49d8422b4bc1c89bb49535f4dc6a01d47295 Mon Sep 17 00:00:00 2001
+From: Angel Pons <th3fanbus@gmail.com>
+Date: Fri, 6 May 2022 23:22:11 +0200
+Subject: [PATCH 06/26] sb/intel/lynxpoint: Add native thermal init
+
+Implement native thermal initialisation for Lynx Point. This is only
+needed when MRC.bin is not used.
+
+Change-Id: I4a67a3092d0c2e56bfdacb513a899ef838193cbd
+Signed-off-by: Angel Pons <th3fanbus@gmail.com>
+---
+ .../haswell/native_raminit/raminit_native.c | 1 +
+ src/southbridge/intel/lynxpoint/Makefile.inc | 2 +-
+ src/southbridge/intel/lynxpoint/pch.h | 1 +
+ src/southbridge/intel/lynxpoint/thermal.c | 64 +++++++++++++++++++
+ 4 files changed, 67 insertions(+), 1 deletion(-)
+ create mode 100644 src/southbridge/intel/lynxpoint/thermal.c
+
+diff --git a/src/northbridge/intel/haswell/native_raminit/raminit_native.c b/src/northbridge/intel/haswell/native_raminit/raminit_native.c
+index ef61d4ee09..dd1f1ec14e 100644
+--- a/src/northbridge/intel/haswell/native_raminit/raminit_native.c
++++ b/src/northbridge/intel/haswell/native_raminit/raminit_native.c
+@@ -16,6 +16,7 @@ static bool early_init_native(int s3resume)
+ /** TODO: CPU replacement check must be skipped in warm boots and S3 resumes **/
+ const bool cpu_replaced = !s3resume && intel_early_me_cpu_replacement_check();
+
++ early_thermal_init();
+ early_usb_init();
+
+ if (!CONFIG(INTEL_LYNXPOINT_LP))
+diff --git a/src/southbridge/intel/lynxpoint/Makefile.inc b/src/southbridge/intel/lynxpoint/Makefile.inc
+index 0e1f2fe4eb..a9a9b153d6 100644
+--- a/src/southbridge/intel/lynxpoint/Makefile.inc
++++ b/src/southbridge/intel/lynxpoint/Makefile.inc
+@@ -37,7 +37,7 @@ bootblock-y += early_pch.c
+ romstage-y += early_usb.c early_me.c me_status.c early_pch.c
+ romstage-y += pmutil.c
+
+-romstage-$(CONFIG_USE_NATIVE_RAMINIT) += early_pch_native.c early_usb_native.c iobp.c
++romstage-$(CONFIG_USE_NATIVE_RAMINIT) += early_pch_native.c early_usb_native.c iobp.c thermal.c
+
+ ifeq ($(CONFIG_INTEL_LYNXPOINT_LP),y)
+ romstage-y += lp_gpio.c
+diff --git a/src/southbridge/intel/lynxpoint/pch.h b/src/southbridge/intel/lynxpoint/pch.h
+index ad983d86cf..38a9349220 100644
+--- a/src/southbridge/intel/lynxpoint/pch.h
++++ b/src/southbridge/intel/lynxpoint/pch.h
+@@ -116,6 +116,7 @@ enum pch_platform_type {
+ void pch_dmi_setup_physical_layer(void);
+ void pch_dmi_tc_vc_mapping(u32 vc0, u32 vc1, u32 vcp, u32 vcm);
+ void early_usb_init(void);
++void early_thermal_init(void);
+
+ void usb_ehci_sleep_prepare(pci_devfn_t dev, u8 slp_typ);
+ void usb_ehci_disable(pci_devfn_t dev);
+diff --git a/src/southbridge/intel/lynxpoint/thermal.c b/src/southbridge/intel/lynxpoint/thermal.c
+new file mode 100644
+index 0000000000..e71969ea0c
+--- /dev/null
++++ b/src/southbridge/intel/lynxpoint/thermal.c
+@@ -0,0 +1,64 @@
++/* SPDX-License-Identifier: GPL-2.0-or-later */
++
++#include <device/mmio.h>
++#include <device/pci_ops.h>
++#include <southbridge/intel/lynxpoint/pch.h>
++#include <types.h>
++
++#define TBARB_TEMP 0x40000000
++
++#define THERMAL_DEV PCI_DEV(0, 0x1f, 6)
++
++/* Early thermal init, it may need to be done prior to giving ME its memory */
++void early_thermal_init(void)
++{
++ /* Program address for temporary BAR */
++ pci_write_config32(THERMAL_DEV, 0x40, TBARB_TEMP);
++ pci_write_config32(THERMAL_DEV, 0x44, 0);
++
++ /* Activate temporary BAR */
++ pci_or_config32(THERMAL_DEV, 0x40, 1);
++
++ /*
++ * BWG section 17.3.1 says:
++ *
++ * ### Initializing Lynx Point Thermal Sensors ###
++ *
++ * The System BIOS must perform the following steps to initialize the Lynx
++ * Point thermal subsystem device, D31:F6. The System BIOS is required to
++ * repeat this process on a resume from Sx. BIOS may enable any or all of
++ * the registers below based on OEM's platform configuration. Intel does
++ * not recommend a value on some of the registers, since each platform has
++ * different temperature trip points and one may enable a trip to cause an
++ * SMI while another platform would cause an interrupt instead.
++ *
++ * The recommended flow for enabling thermal sensor is by setting up various
++ * temperature trip points first, followed by enabling the desired trip
++ * alert method and then enable the actual sensors from TSEL registers.
++ * If this flow is not followed, software will need to take special care
++ * to handle false events during setting up those registers.
++ */
++
++ /* Step 1: Program CTT */
++ write16p(TBARB_TEMP + 0x10, 0x0154);
++
++ /* Step 2: Clear trip status from TSS and TAS */
++ write8p(TBARB_TEMP + 0x06, 0xff);
++ write8p(TBARB_TEMP + 0x80, 0xff);
++
++ /* Step 3: Program TSGPEN and TSPIEN to zero */
++ write8p(TBARB_TEMP + 0x84, 0x00);
++ write8p(TBARB_TEMP + 0x82, 0x00);
++
++ /*
++ * Step 4: If thermal reporting to an EC over SMBus is supported,
++ * then write 0x01 to TSREL, else leave at default.
++ */
++ write8p(TBARB_TEMP + 0x0a, 0x01);
++
++ /* Disable temporary BAR */
++ pci_and_config32(THERMAL_DEV, 0x40, ~1);
++
++ /* Clear temporary BAR address */
++ pci_write_config32(THERMAL_DEV, 0x40, 0);
++}
+--
+2.39.2
+
diff --git a/resources/coreboot/haswell/patches/0007-sb-intel-lynxpoint-Add-native-PCH-init.patch b/resources/coreboot/haswell/patches/0007-sb-intel-lynxpoint-Add-native-PCH-init.patch
new file mode 100644
index 00000000..74427f5d
--- /dev/null
+++ b/resources/coreboot/haswell/patches/0007-sb-intel-lynxpoint-Add-native-PCH-init.patch
@@ -0,0 +1,785 @@
+From 7378cb4fefc87b9a096bb14820a44f26f3a628f5 Mon Sep 17 00:00:00 2001
+From: Angel Pons <th3fanbus@gmail.com>
+Date: Fri, 6 May 2022 23:43:46 +0200
+Subject: [PATCH 07/26] sb/intel/lynxpoint: Add native PCH init
+
+Implement native PCH initialisation for Lynx Point. This is only needed
+when MRC.bin is not used.
+
+Change-Id: I36867bdc8b20000e44ff9d0d7b2c0d63952bd561
+Signed-off-by: Angel Pons <th3fanbus@gmail.com>
+---
+ .../haswell/native_raminit/raminit_native.c | 3 +-
+ src/southbridge/intel/lynxpoint/Makefile.inc | 1 +
+ .../intel/lynxpoint/early_pch_native.c | 123 +++++++++
+ .../intel/lynxpoint/hsio/Makefile.inc | 8 +
+ src/southbridge/intel/lynxpoint/hsio/common.c | 52 ++++
+ src/southbridge/intel/lynxpoint/hsio/hsio.h | 46 ++++
+ .../intel/lynxpoint/hsio/lpt_h_cx.c | 244 ++++++++++++++++++
+ .../intel/lynxpoint/hsio/lpt_lp_bx.c | 180 +++++++++++++
+ src/southbridge/intel/lynxpoint/pch.h | 6 +
+ 9 files changed, 661 insertions(+), 2 deletions(-)
+ create mode 100644 src/southbridge/intel/lynxpoint/hsio/Makefile.inc
+ create mode 100644 src/southbridge/intel/lynxpoint/hsio/common.c
+ create mode 100644 src/southbridge/intel/lynxpoint/hsio/hsio.h
+ create mode 100644 src/southbridge/intel/lynxpoint/hsio/lpt_h_cx.c
+ create mode 100644 src/southbridge/intel/lynxpoint/hsio/lpt_lp_bx.c
+
+diff --git a/src/northbridge/intel/haswell/native_raminit/raminit_native.c b/src/northbridge/intel/haswell/native_raminit/raminit_native.c
+index dd1f1ec14e..b6efb6b40d 100644
+--- a/src/northbridge/intel/haswell/native_raminit/raminit_native.c
++++ b/src/northbridge/intel/haswell/native_raminit/raminit_native.c
+@@ -16,8 +16,7 @@ static bool early_init_native(int s3resume)
+ /** TODO: CPU replacement check must be skipped in warm boots and S3 resumes **/
+ const bool cpu_replaced = !s3resume && intel_early_me_cpu_replacement_check();
+
+- early_thermal_init();
+- early_usb_init();
++ early_pch_init_native(s3resume);
+
+ if (!CONFIG(INTEL_LYNXPOINT_LP))
+ dmi_early_init();
+diff --git a/src/southbridge/intel/lynxpoint/Makefile.inc b/src/southbridge/intel/lynxpoint/Makefile.inc
+index a9a9b153d6..63243ecc86 100644
+--- a/src/southbridge/intel/lynxpoint/Makefile.inc
++++ b/src/southbridge/intel/lynxpoint/Makefile.inc
+@@ -38,6 +38,7 @@ romstage-y += early_usb.c early_me.c me_status.c early_pch.c
+ romstage-y += pmutil.c
+
+ romstage-$(CONFIG_USE_NATIVE_RAMINIT) += early_pch_native.c early_usb_native.c iobp.c thermal.c
++subdirs-$(CONFIG_USE_NATIVE_RAMINIT) += hsio
+
+ ifeq ($(CONFIG_INTEL_LYNXPOINT_LP),y)
+ romstage-y += lp_gpio.c
+diff --git a/src/southbridge/intel/lynxpoint/early_pch_native.c b/src/southbridge/intel/lynxpoint/early_pch_native.c
+index c28ddfcf5d..421821fa5d 100644
+--- a/src/southbridge/intel/lynxpoint/early_pch_native.c
++++ b/src/southbridge/intel/lynxpoint/early_pch_native.c
+@@ -1,10 +1,133 @@
+ /* SPDX-License-Identifier: GPL-2.0-or-later */
+
+ #include <console/console.h>
++#include <device/pci_def.h>
+ #include <device/pci_ops.h>
++#include <southbridge/intel/lynxpoint/hsio/hsio.h>
+ #include <southbridge/intel/lynxpoint/pch.h>
+ #include <types.h>
+
++static void early_sata_init(const uint8_t pch_revision)
++{
++ const bool is_mobile = get_pch_platform_type() != PCH_TYPE_DESKTOP;
++
++ const uint8_t lane_owner = pci_read_config8(PCI_DEV(0, 0x1c, 0), 0x410);
++ printk(BIOS_DEBUG, "HSIO lane owner: 0x%02x\n", lane_owner);
++
++ /* BWG Step 2 */
++ pci_update_config32(PCH_SATA_DEV, SATA_SCLKG, ~0x1ff, 0x183);
++
++ /* BWG Step 3: Set OOB Retry Mode */
++ pci_or_config16(PCH_SATA_DEV, SATA_PCS, 1 << 15);
++
++ /* BWG Step 4: Program the SATA mPHY tables */
++ if (pch_is_lp()) {
++ if (pch_revision >= LPT_LP_STEP_B0 && pch_revision <= LPT_LP_STEP_B2) {
++ program_hsio_sata_lpt_lp_bx(is_mobile);
++ } else {
++ printk(BIOS_ERR, "Unsupported PCH-LP stepping 0x%02x\n", pch_revision);
++ }
++ } else {
++ if (pch_revision >= LPT_H_STEP_C0) {
++ program_hsio_sata_lpt_h_cx(is_mobile);
++ } else {
++ printk(BIOS_ERR, "Unsupported PCH-H stepping 0x%02x\n", pch_revision);
++ }
++ }
++
++ /** FIXME: Program SATA RxEq tables **/
++
++ /* BWG Step 5 */
++ /** FIXME: Only for desktop and mobile (skip this on workstation and server) **/
++ pci_or_config32(PCH_SATA_DEV, 0x98, BIT(22));
++
++ /* BWG Step 6 */
++ pci_or_config32(PCH_SATA_DEV, 0x98, BIT(19));
++
++ /* BWG Step 7 */
++ pci_update_config32(PCH_SATA_DEV, 0x98, ~(0x3f << 7), 0x04 << 7);
++
++ /* BWG Step 8 */
++ pci_or_config32(PCH_SATA_DEV, 0x98, BIT(20));
++
++ /* BWG Step 9 */
++ pci_update_config32(PCH_SATA_DEV, 0x98, ~(3 << 5), 1 << 5);
++
++ /* BWG Step 10 */
++ pci_or_config32(PCH_SATA_DEV, 0x98, BIT(18));
++
++ /* Enable SATA ports */
++ uint8_t sata_pcs = 0;
++ if (CONFIG(INTEL_LYNXPOINT_LP)) {
++ for (uint8_t i = 0; i < 4; i++) {
++ if ((lane_owner & BIT(7 - i)) == 0) {
++ sata_pcs |= BIT(i);
++ }
++ }
++ } else {
++ sata_pcs |= 0x0f;
++ for (uint8_t i = 4; i < 6; i++) {
++ if ((lane_owner & BIT(i)) == 0) {
++ sata_pcs |= BIT(i);
++ }
++ }
++ }
++ printk(BIOS_DEBUG, "SATA port enables: 0x%02x\n", sata_pcs);
++ pci_or_config8(PCH_SATA_DEV, SATA_PCS, sata_pcs);
++}
++
++void early_pch_init_native(int s3resume)
++{
++ const uint8_t pch_revision = pci_read_config8(PCH_LPC_DEV, PCI_REVISION_ID);
++
++ RCBA16(DISPBDF) = 0x0010;
++ RCBA32_OR(FD2, PCH_ENABLE_DBDF);
++
++ /** FIXME: Check GEN_PMCON_3 and handle RTC failure? **/
++
++ RCBA32(PRSTS) = BIT(4);
++
++ early_sata_init(pch_revision);
++
++ pci_or_config8(PCH_LPC_DEV, 0xa6, 1 << 1);
++ pci_and_config8(PCH_LPC_DEV, 0xdc, ~(1 << 5 | 1 << 1));
++
++ /** TODO: Send GET HSIO VER and update ChipsetInit table? Is it needed? **/
++
++ /** FIXME: GbE handling? **/
++
++ pci_update_config32(PCH_LPC_DEV, 0xac, ~(1 << 20), 0);
++
++ for (uint8_t i = 0; i < 8; i++)
++ pci_update_config32(PCI_DEV(0, 0x1c, i), 0x338, ~(1 << 26), 0);
++
++ pci_update_config8(PCI_DEV(0, 0x1c, 0), 0xf4, ~(3 << 5), 1 << 7);
++
++ pci_update_config8(PCI_DEV(0, 26, 0), 0x88, ~(1 << 2), 0);
++ pci_update_config8(PCI_DEV(0, 29, 0), 0x88, ~(1 << 2), 0);
++
++ /** FIXME: Disable SATA2 device? **/
++
++ if (pch_is_lp()) {
++ if (pch_revision >= LPT_LP_STEP_B0 && pch_revision <= LPT_LP_STEP_B2) {
++ program_hsio_xhci_lpt_lp_bx();
++ program_hsio_igbe_lpt_lp_bx();
++ } else {
++ printk(BIOS_ERR, "Unsupported PCH-LP stepping 0x%02x\n", pch_revision);
++ }
++ } else {
++ if (pch_revision >= LPT_H_STEP_C0) {
++ program_hsio_xhci_lpt_h_cx();
++ program_hsio_igbe_lpt_h_cx();
++ } else {
++ printk(BIOS_ERR, "Unsupported PCH-H stepping 0x%02x\n", pch_revision);
++ }
++ }
++
++ early_thermal_init();
++ early_usb_init();
++}
++
+ void pch_dmi_setup_physical_layer(void)
+ {
+ /* FIXME: We need to make sure the SA supports Gen2 as well */
+diff --git a/src/southbridge/intel/lynxpoint/hsio/Makefile.inc b/src/southbridge/intel/lynxpoint/hsio/Makefile.inc
+new file mode 100644
+index 0000000000..6b74997511
+--- /dev/null
++++ b/src/southbridge/intel/lynxpoint/hsio/Makefile.inc
+@@ -0,0 +1,8 @@
++## SPDX-License-Identifier: GPL-2.0-or-later
++
++romstage-y += common.c
++ifeq ($(CONFIG_INTEL_LYNXPOINT_LP),y)
++romstage-y += lpt_lp_bx.c
++else
++romstage-y += lpt_h_cx.c
++endif
+diff --git a/src/southbridge/intel/lynxpoint/hsio/common.c b/src/southbridge/intel/lynxpoint/hsio/common.c
+new file mode 100644
+index 0000000000..9935ca347a
+--- /dev/null
++++ b/src/southbridge/intel/lynxpoint/hsio/common.c
+@@ -0,0 +1,52 @@
++/* SPDX-License-Identifier: GPL-2.0-or-later */
++
++#include <device/pci_ops.h>
++#include <southbridge/intel/lynxpoint/hsio/hsio.h>
++#include <types.h>
++
++/*
++ * FIXME: Ask Intel whether all lanes need to be programmed as specified
++ * in the PCH BWG. If not, make separate tables and only check this once.
++ */
++void hsio_sata_shared_update(const uint32_t addr, const uint32_t and, const uint32_t or)
++{
++ const uint8_t lane_owner = pci_read_config8(PCI_DEV(0, 0x1c, 0), 0x410);
++
++ if ((addr & 0xfe00) == 0x2000 && (lane_owner & (1 << 4)))
++ return;
++
++ if ((addr & 0xfe00) == 0x2200 && (lane_owner & (1 << 5)))
++ return;
++
++ if (CONFIG(INTEL_LYNXPOINT_LP)) {
++ if ((addr & 0xfe00) == 0x2400 && (lane_owner & (1 << 6)))
++ return;
++
++ if ((addr & 0xfe00) == 0x2600 && (lane_owner & (1 << 7)))
++ return;
++ }
++ hsio_update(addr, and, or);
++}
++
++/*
++ * FIXME: Ask Intel whether all lanes need to be programmed as specified
++ * in the PCH BWG. If not, make separate tables and only check this once.
++ */
++void hsio_xhci_shared_update(const uint32_t addr, const uint32_t and, const uint32_t or)
++{
++ const uint8_t lane_owner = pci_read_config8(PCI_DEV(0, 0x1c, 0), 0x410);
++ if (CONFIG(INTEL_LYNXPOINT_LP)) {
++ if ((addr & 0xfe00) == 0x2400 && ((lane_owner >> 0) & 3) != 2)
++ return;
++
++ if ((addr & 0xfe00) == 0x2600 && ((lane_owner >> 2) & 3) != 2)
++ return;
++ } else {
++ if ((addr & 0xfe00) == 0x2c00 && ((lane_owner >> 2) & 3) != 2)
++ return;
++
++ if ((addr & 0xfe00) == 0x2e00 && ((lane_owner >> 0) & 3) != 2)
++ return;
++ }
++ hsio_update(addr, and, or);
++}
+diff --git a/src/southbridge/intel/lynxpoint/hsio/hsio.h b/src/southbridge/intel/lynxpoint/hsio/hsio.h
+new file mode 100644
+index 0000000000..689ef4a05b
+--- /dev/null
++++ b/src/southbridge/intel/lynxpoint/hsio/hsio.h
+@@ -0,0 +1,46 @@
++/* SPDX-License-Identifier: GPL-2.0-or-later */
++
++#ifndef SOUTHBRIDGE_INTEL_LYNXPOINT_HSIO_H
++#define SOUTHBRIDGE_INTEL_LYNXPOINT_HSIO_H
++
++#include <southbridge/intel/lynxpoint/iobp.h>
++#include <types.h>
++
++struct hsio_table_row {
++ uint32_t addr;
++ uint32_t and;
++ uint32_t or;
++};
++
++static inline void hsio_update(const uint32_t addr, const uint32_t and, const uint32_t or)
++{
++ pch_iobp_update(addr, and, or);
++}
++
++static inline void hsio_update_row(const struct hsio_table_row row)
++{
++ hsio_update(row.addr, row.and, row.or);
++}
++
++void hsio_xhci_shared_update(const uint32_t addr, const uint32_t and, const uint32_t or);
++void hsio_sata_shared_update(const uint32_t addr, const uint32_t and, const uint32_t or);
++
++static inline void hsio_sata_shared_update_row(const struct hsio_table_row row)
++{
++ hsio_sata_shared_update(row.addr, row.and, row.or);
++}
++
++static inline void hsio_xhci_shared_update_row(const struct hsio_table_row row)
++{
++ hsio_xhci_shared_update(row.addr, row.and, row.or);
++}
++
++void program_hsio_sata_lpt_h_cx(const bool is_mobile);
++void program_hsio_xhci_lpt_h_cx(void);
++void program_hsio_igbe_lpt_h_cx(void);
++
++void program_hsio_sata_lpt_lp_bx(const bool is_mobile);
++void program_hsio_xhci_lpt_lp_bx(void);
++void program_hsio_igbe_lpt_lp_bx(void);
++
++#endif
+diff --git a/src/southbridge/intel/lynxpoint/hsio/lpt_h_cx.c b/src/southbridge/intel/lynxpoint/hsio/lpt_h_cx.c
+new file mode 100644
+index 0000000000..b5dd402742
+--- /dev/null
++++ b/src/southbridge/intel/lynxpoint/hsio/lpt_h_cx.c
+@@ -0,0 +1,244 @@
++/* SPDX-License-Identifier: GPL-2.0-or-later */
++
++#include <device/pci_ops.h>
++#include <southbridge/intel/lynxpoint/hsio/hsio.h>
++#include <types.h>
++
++const struct hsio_table_row hsio_sata_shared_lpt_h_cx[] = {
++ { 0xea002008, ~0xfffc6108, 0xea6c6108 },
++ { 0xea002208, ~0xfffc6108, 0xea6c6108 },
++ { 0xea002038, ~0x3f00000f, 0x0700000d },
++ { 0xea002238, ~0x3f00000f, 0x0700000d },
++ { 0xea00202c, ~0x00020f00, 0x00020100 },
++ { 0xea00222c, ~0x00020f00, 0x00020100 },
++ { 0xea002040, ~0x1f000000, 0x01000000 },
++ { 0xea002240, ~0x1f000000, 0x01000000 },
++ { 0xea002010, ~0xffff0000, 0x0d510000 },
++ { 0xea002210, ~0xffff0000, 0x0d510000 },
++ { 0xea002018, ~0xffff0300, 0x38250100 },
++ { 0xea002218, ~0xffff0300, 0x38250100 },
++ { 0xea002000, ~0xcf030000, 0xcf030000 },
++ { 0xea002200, ~0xcf030000, 0xcf030000 },
++ { 0xea002028, ~0xff1f0000, 0x580e0000 },
++ { 0xea002228, ~0xff1f0000, 0x580e0000 },
++ { 0xea00201c, ~0x00007c00, 0x00002400 },
++ { 0xea00221c, ~0x00007c00, 0x00002400 },
++ { 0xea00208c, ~0x00ff0000, 0x00800000 },
++ { 0xea00228c, ~0x00ff0000, 0x00800000 },
++ { 0xea0020a4, ~0x0030ff00, 0x00308300 },
++ { 0xea0022a4, ~0x0030ff00, 0x00308300 },
++ { 0xea0020ac, ~0x00000030, 0x00000020 },
++ { 0xea0022ac, ~0x00000030, 0x00000020 },
++ { 0xea002140, ~0x00ffffff, 0x00140718 },
++ { 0xea002340, ~0x00ffffff, 0x00140718 },
++ { 0xea002144, ~0x00ffffff, 0x00140998 },
++ { 0xea002344, ~0x00ffffff, 0x00140998 },
++ { 0xea002148, ~0x00ffffff, 0x00140998 },
++ { 0xea002348, ~0x00ffffff, 0x00140998 },
++ { 0xea00217c, ~0x03000000, 0x03000000 },
++ { 0xea00237c, ~0x03000000, 0x03000000 },
++ { 0xea002178, ~0x00001f00, 0x00001800 },
++ { 0xea002378, ~0x00001f00, 0x00001800 },
++ { 0xea00210c, ~0x0038000f, 0x00000005 },
++ { 0xea00230c, ~0x0038000f, 0x00000005 },
++};
++
++const struct hsio_table_row hsio_sata_lpt_h_cx[] = {
++ { 0xea008008, ~0xff000000, 0x1c000000 },
++ { 0xea002408, ~0xfffc6108, 0xea6c6108 },
++ { 0xea002608, ~0xfffc6108, 0xea6c6108 },
++ { 0xea000808, ~0xfffc6108, 0xea6c6108 },
++ { 0xea000a08, ~0xfffc6108, 0xea6c6108 },
++ { 0xea002438, ~0x3f00000f, 0x0700000d },
++ { 0xea002638, ~0x3f00000f, 0x0700000d },
++ { 0xea000838, ~0x3f00000f, 0x0700000d },
++ { 0xea000a38, ~0x3f00000f, 0x0700000d },
++ { 0xea002440, ~0x1f000000, 0x01000000 },
++ { 0xea002640, ~0x1f000000, 0x01000000 },
++ { 0xea000840, ~0x1f000000, 0x01000000 },
++ { 0xea000a40, ~0x1f000000, 0x01000000 },
++ { 0xea002410, ~0xffff0000, 0x0d510000 },
++ { 0xea002610, ~0xffff0000, 0x0d510000 },
++ { 0xea000810, ~0xffff0000, 0x0d510000 },
++ { 0xea000a10, ~0xffff0000, 0x0d510000 },
++ { 0xea00242c, ~0x00020800, 0x00020000 },
++ { 0xea00262c, ~0x00020800, 0x00020000 },
++ { 0xea00082c, ~0x00020800, 0x00020000 },
++ { 0xea000a2c, ~0x00020800, 0x00020000 },
++ { 0xea002418, ~0xffff0300, 0x38250100 },
++ { 0xea002618, ~0xffff0300, 0x38250100 },
++ { 0xea000818, ~0xffff0300, 0x38250100 },
++ { 0xea000a18, ~0xffff0300, 0x38250100 },
++ { 0xea002400, ~0xcf030000, 0xcf030000 },
++ { 0xea002600, ~0xcf030000, 0xcf030000 },
++ { 0xea000800, ~0xcf030000, 0xcf030000 },
++ { 0xea000a00, ~0xcf030000, 0xcf030000 },
++ { 0xea002428, ~0xff1f0000, 0x580e0000 },
++ { 0xea002628, ~0xff1f0000, 0x580e0000 },
++ { 0xea000828, ~0xff1f0000, 0x580e0000 },
++ { 0xea000a28, ~0xff1f0000, 0x580e0000 },
++ { 0xea00241c, ~0x00007c00, 0x00002400 },
++ { 0xea00261c, ~0x00007c00, 0x00002400 },
++ { 0xea00081c, ~0x00007c00, 0x00002400 },
++ { 0xea000a1c, ~0x00007c00, 0x00002400 },
++ { 0xea00248c, ~0x00ff0000, 0x00800000 },
++ { 0xea00268c, ~0x00ff0000, 0x00800000 },
++ { 0xea00088c, ~0x00ff0000, 0x00800000 },
++ { 0xea000a8c, ~0x00ff0000, 0x00800000 },
++ { 0xea0024a4, ~0x0030ff00, 0x00308300 },
++ { 0xea0026a4, ~0x0030ff00, 0x00308300 },
++ { 0xea0008a4, ~0x0030ff00, 0x00308300 },
++ { 0xea000aa4, ~0x0030ff00, 0x00308300 },
++ { 0xea0024ac, ~0x00000030, 0x00000020 },
++ { 0xea0026ac, ~0x00000030, 0x00000020 },
++ { 0xea0008ac, ~0x00000030, 0x00000020 },
++ { 0xea000aac, ~0x00000030, 0x00000020 },
++ { 0xea002540, ~0x00ffffff, 0x00140718 },
++ { 0xea002740, ~0x00ffffff, 0x00140718 },
++ { 0xea000940, ~0x00ffffff, 0x00140718 },
++ { 0xea000b40, ~0x00ffffff, 0x00140718 },
++ { 0xea002544, ~0x00ffffff, 0x00140998 },
++ { 0xea002744, ~0x00ffffff, 0x00140998 },
++ { 0xea000944, ~0x00ffffff, 0x00140998 },
++ { 0xea000b44, ~0x00ffffff, 0x00140998 },
++ { 0xea002548, ~0x00ffffff, 0x00140998 },
++ { 0xea002748, ~0x00ffffff, 0x00140998 },
++ { 0xea000948, ~0x00ffffff, 0x00140998 },
++ { 0xea000b48, ~0x00ffffff, 0x00140998 },
++ { 0xea00257c, ~0x03000000, 0x03000000 },
++ { 0xea00277c, ~0x03000000, 0x03000000 },
++ { 0xea00097c, ~0x03000000, 0x03000000 },
++ { 0xea000b7c, ~0x03000000, 0x03000000 },
++ { 0xea002578, ~0x00001f00, 0x00001800 },
++ { 0xea002778, ~0x00001f00, 0x00001800 },
++ { 0xea000978, ~0x00001f00, 0x00001800 },
++ { 0xea000b78, ~0x00001f00, 0x00001800 },
++ { 0xea00250c, ~0x0038000f, 0x00000005 },
++ { 0xea00270c, ~0x0038000f, 0x00000005 },
++ { 0xea00090c, ~0x0038000f, 0x00000005 },
++ { 0xea000b0c, ~0x0038000f, 0x00000005 },
++};
++
++const struct hsio_table_row hsio_xhci_shared_lpt_h_cx[] = {
++ { 0xe9002c2c, ~0x00000700, 0x00000100 },
++ { 0xe9002e2c, ~0x00000700, 0x00000100 },
++ { 0xe9002dcc, ~0x00001407, 0x00001407 },
++ { 0xe9002fcc, ~0x00001407, 0x00001407 },
++ { 0xe9002d68, ~0x01000f3c, 0x00000a28 },
++ { 0xe9002f68, ~0x01000f3c, 0x00000a28 },
++ { 0xe9002d6c, ~0x000000ff, 0x0000003f },
++ { 0xe9002f6c, ~0x000000ff, 0x0000003f },
++ { 0xe9002d4c, ~0x00ffff00, 0x00120500 },
++ { 0xe9002f4c, ~0x00ffff00, 0x00120500 },
++ { 0xe9002d14, ~0x38000700, 0x00000100 },
++ { 0xe9002f14, ~0x38000700, 0x00000100 },
++ { 0xe9002d64, ~0x0000f000, 0x00005000 },
++ { 0xe9002f64, ~0x0000f000, 0x00005000 },
++ { 0xe9002d70, ~0x00000018, 0x00000000 },
++ { 0xe9002f70, ~0x00000018, 0x00000000 },
++ { 0xe9002c38, ~0x3f00000f, 0x0700000b },
++ { 0xe9002e38, ~0x3f00000f, 0x0700000b },
++ { 0xe9002d40, ~0x00800000, 0x00000000 },
++ { 0xe9002f40, ~0x00800000, 0x00000000 },
++};
++
++const struct hsio_table_row hsio_xhci_lpt_h_cx[] = {
++ { 0xe90031cc, ~0x00001407, 0x00001407 },
++ { 0xe90033cc, ~0x00001407, 0x00001407 },
++ { 0xe90015cc, ~0x00001407, 0x00001407 },
++ { 0xe90017cc, ~0x00001407, 0x00001407 },
++ { 0xe9003168, ~0x01000f3c, 0x00000a28 },
++ { 0xe9003368, ~0x01000f3c, 0x00000a28 },
++ { 0xe9001568, ~0x01000f3c, 0x00000a28 },
++ { 0xe9001768, ~0x01000f3c, 0x00000a28 },
++ { 0xe900316c, ~0x000000ff, 0x0000003f },
++ { 0xe900336c, ~0x000000ff, 0x0000003f },
++ { 0xe900156c, ~0x000000ff, 0x0000003f },
++ { 0xe900176c, ~0x000000ff, 0x0000003f },
++ { 0xe900314c, ~0x00ffff00, 0x00120500 },
++ { 0xe900334c, ~0x00ffff00, 0x00120500 },
++ { 0xe900154c, ~0x00ffff00, 0x00120500 },
++ { 0xe900174c, ~0x00ffff00, 0x00120500 },
++ { 0xe9003114, ~0x38000700, 0x00000100 },
++ { 0xe9003314, ~0x38000700, 0x00000100 },
++ { 0xe9001514, ~0x38000700, 0x00000100 },
++ { 0xe9001714, ~0x38000700, 0x00000100 },
++ { 0xe9003164, ~0x0000f000, 0x00005000 },
++ { 0xe9003364, ~0x0000f000, 0x00005000 },
++ { 0xe9001564, ~0x0000f000, 0x00005000 },
++ { 0xe9001764, ~0x0000f000, 0x00005000 },
++ { 0xe9003170, ~0x00000018, 0x00000000 },
++ { 0xe9003370, ~0x00000018, 0x00000000 },
++ { 0xe9001570, ~0x00000018, 0x00000000 },
++ { 0xe9001770, ~0x00000018, 0x00000000 },
++ { 0xe9003038, ~0x3f00000f, 0x0700000b },
++ { 0xe9003238, ~0x3f00000f, 0x0700000b },
++ { 0xe9001438, ~0x3f00000f, 0x0700000b },
++ { 0xe9001638, ~0x3f00000f, 0x0700000b },
++ { 0xe9003140, ~0x00800000, 0x00000000 },
++ { 0xe9003340, ~0x00800000, 0x00000000 },
++ { 0xe9001540, ~0x00800000, 0x00000000 },
++ { 0xe9001740, ~0x00800000, 0x00000000 },
++};
++
++void program_hsio_sata_lpt_h_cx(const bool is_mobile)
++{
++ const struct hsio_table_row *pch_hsio_table;
++ size_t len;
++
++ pch_hsio_table = hsio_sata_lpt_h_cx;
++ len = ARRAY_SIZE(hsio_sata_lpt_h_cx);
++ for (size_t i = 0; i < len; i++)
++ hsio_update_row(pch_hsio_table[i]);
++
++ pch_hsio_table = hsio_sata_shared_lpt_h_cx;
++ len = ARRAY_SIZE(hsio_sata_shared_lpt_h_cx);
++ for (size_t i = 0; i < len; i++)
++ hsio_sata_shared_update_row(pch_hsio_table[i]);
++
++ const uint32_t hsio_sata_value = is_mobile ? 0x00004c5a : 0x00003e67;
++
++ hsio_update(0xea002490, ~0x0000ffff, hsio_sata_value);
++ hsio_update(0xea002690, ~0x0000ffff, hsio_sata_value);
++ hsio_update(0xea000890, ~0x0000ffff, hsio_sata_value);
++ hsio_update(0xea000a90, ~0x0000ffff, hsio_sata_value);
++
++ hsio_sata_shared_update(0xea002090, ~0x0000ffff, hsio_sata_value);
++ hsio_sata_shared_update(0xea002290, ~0x0000ffff, hsio_sata_value);
++}
++
++void program_hsio_xhci_lpt_h_cx(void)
++{
++ const struct hsio_table_row *pch_hsio_table;
++ size_t len;
++
++ pch_hsio_table = hsio_xhci_lpt_h_cx;
++ len = ARRAY_SIZE(hsio_xhci_lpt_h_cx);
++
++ for (size_t i = 0; i < len; i++)
++ hsio_update_row(pch_hsio_table[i]);
++
++ pch_hsio_table = hsio_xhci_shared_lpt_h_cx;
++ len = ARRAY_SIZE(hsio_xhci_shared_lpt_h_cx);
++
++ for (size_t i = 0; i < len; i++)
++ hsio_xhci_shared_update_row(pch_hsio_table[i]);
++}
++
++void program_hsio_igbe_lpt_h_cx(void)
++{
++ const uint32_t strpfusecfg1 = pci_read_config32(PCI_DEV(0, 0x1c, 0), 0xfc);
++ if (!(strpfusecfg1 & (1 << 19)))
++ return;
++
++ const uint8_t gbe_port = (strpfusecfg1 >> 16) & 0x7;
++ const uint8_t lane_owner = pci_read_config8(PCI_DEV(0, 0x1c, 0), 0x410);
++ if (gbe_port == 0 && ((lane_owner >> 0) & 3) != 1)
++ return;
++
++ if (gbe_port == 1 && ((lane_owner >> 2) & 3) != 1)
++ return;
++
++ const uint32_t gbe_hsio_base = 0xe900 << 16 | (0x2e - 2 * gbe_port) << 8;
++ hsio_update(gbe_hsio_base + 0x08, ~0xf0000100, 0xe0000100);
++}
+diff --git a/src/southbridge/intel/lynxpoint/hsio/lpt_lp_bx.c b/src/southbridge/intel/lynxpoint/hsio/lpt_lp_bx.c
+new file mode 100644
+index 0000000000..24679e791a
+--- /dev/null
++++ b/src/southbridge/intel/lynxpoint/hsio/lpt_lp_bx.c
+@@ -0,0 +1,180 @@
++/* SPDX-License-Identifier: GPL-2.0-or-later */
++
++#include <device/pci_ops.h>
++#include <southbridge/intel/lynxpoint/iobp.h>
++#include <southbridge/intel/lynxpoint/hsio/hsio.h>
++#include <types.h>
++
++const struct hsio_table_row hsio_sata_shared_lpt_lp_bx[] = {
++ { 0xea008008, ~0xff000000, 0x1c000000 },
++ { 0xea002008, ~0xfffc6108, 0xea6c6108 },
++ { 0xea002208, ~0xfffc6108, 0xea6c6108 },
++ { 0xea002408, ~0xfffc6108, 0xea6c6108 },
++ { 0xea002608, ~0xfffc6108, 0xea6c6108 },
++ { 0xea002038, ~0x0000000f, 0x0000000d },
++ { 0xea002238, ~0x0000000f, 0x0000000d },
++ { 0xea002438, ~0x0000000f, 0x0000000d },
++ { 0xea002638, ~0x0000000f, 0x0000000d },
++ { 0xea00202c, ~0x00020f00, 0x00020100 },
++ { 0xea00222c, ~0x00020f00, 0x00020100 },
++ { 0xea00242c, ~0x00020f00, 0x00020100 },
++ { 0xea00262c, ~0x00020f00, 0x00020100 },
++ { 0xea002040, ~0x1f000000, 0x01000000 },
++ { 0xea002240, ~0x1f000000, 0x01000000 },
++ { 0xea002440, ~0x1f000000, 0x01000000 },
++ { 0xea002640, ~0x1f000000, 0x01000000 },
++ { 0xea002010, ~0xffff0000, 0x55510000 },
++ { 0xea002210, ~0xffff0000, 0x55510000 },
++ { 0xea002410, ~0xffff0000, 0x55510000 },
++ { 0xea002610, ~0xffff0000, 0x55510000 },
++ { 0xea002140, ~0x00ffffff, 0x00140718 },
++ { 0xea002340, ~0x00ffffff, 0x00140718 },
++ { 0xea002540, ~0x00ffffff, 0x00140718 },
++ { 0xea002740, ~0x00ffffff, 0x00140718 },
++ { 0xea002144, ~0x00ffffff, 0x00140998 },
++ { 0xea002344, ~0x00ffffff, 0x00140998 },
++ { 0xea002544, ~0x00ffffff, 0x00140998 },
++ { 0xea002744, ~0x00ffffff, 0x00140998 },
++ { 0xea002148, ~0x00ffffff, 0x00140998 },
++ { 0xea002348, ~0x00ffffff, 0x00140998 },
++ { 0xea002548, ~0x00ffffff, 0x00140998 },
++ { 0xea002748, ~0x00ffffff, 0x00140998 },
++ { 0xea00217c, ~0x03000000, 0x03000000 },
++ { 0xea00237c, ~0x03000000, 0x03000000 },
++ { 0xea00257c, ~0x03000000, 0x03000000 },
++ { 0xea00277c, ~0x03000000, 0x03000000 },
++ { 0xea00208c, ~0x00ff0000, 0x00800000 },
++ { 0xea00228c, ~0x00ff0000, 0x00800000 },
++ { 0xea00248c, ~0x00ff0000, 0x00800000 },
++ { 0xea00268c, ~0x00ff0000, 0x00800000 },
++ { 0xea0020a4, ~0x0030ff00, 0x00308300 },
++ { 0xea0022a4, ~0x0030ff00, 0x00308300 },
++ { 0xea0024a4, ~0x0030ff00, 0x00308300 },
++ { 0xea0026a4, ~0x0030ff00, 0x00308300 },
++ { 0xea0020ac, ~0x00000030, 0x00000020 },
++ { 0xea0022ac, ~0x00000030, 0x00000020 },
++ { 0xea0024ac, ~0x00000030, 0x00000020 },
++ { 0xea0026ac, ~0x00000030, 0x00000020 },
++ { 0xea002018, ~0xffff0300, 0x38250100 },
++ { 0xea002218, ~0xffff0300, 0x38250100 },
++ { 0xea002418, ~0xffff0300, 0x38250100 },
++ { 0xea002618, ~0xffff0300, 0x38250100 },
++ { 0xea002000, ~0xcf030000, 0xcf030000 },
++ { 0xea002200, ~0xcf030000, 0xcf030000 },
++ { 0xea002400, ~0xcf030000, 0xcf030000 },
++ { 0xea002600, ~0xcf030000, 0xcf030000 },
++ { 0xea002028, ~0xff1f0000, 0x580e0000 },
++ { 0xea002228, ~0xff1f0000, 0x580e0000 },
++ { 0xea002428, ~0xff1f0000, 0x580e0000 },
++ { 0xea002628, ~0xff1f0000, 0x580e0000 },
++ { 0xea00201c, ~0x00007c00, 0x00002400 },
++ { 0xea00221c, ~0x00007c00, 0x00002400 },
++ { 0xea00241c, ~0x00007c00, 0x00002400 },
++ { 0xea00261c, ~0x00007c00, 0x00002400 },
++ { 0xea002178, ~0x00001f00, 0x00001800 },
++ { 0xea002378, ~0x00001f00, 0x00001800 },
++ { 0xea002578, ~0x00001f00, 0x00001800 },
++ { 0xea002778, ~0x00001f00, 0x00001800 },
++ { 0xea00210c, ~0x0038000f, 0x00000005 },
++ { 0xea00230c, ~0x0038000f, 0x00000005 },
++ { 0xea00250c, ~0x0038000f, 0x00000005 },
++ { 0xea00270c, ~0x0038000f, 0x00000005 },
++};
++
++const struct hsio_table_row hsio_xhci_shared_lpt_lp_bx[] = {
++ { 0xe90025cc, ~0x00001407, 0x00001407 },
++ { 0xe90027cc, ~0x00001407, 0x00001407 },
++ { 0xe9002568, ~0x01000f3c, 0x00000a28 },
++ { 0xe9002768, ~0x01000f3c, 0x00000a28 },
++ { 0xe900242c, ~0x00000700, 0x00000100 },
++ { 0xe900262c, ~0x00000700, 0x00000100 },
++ { 0xe900256c, ~0x000000ff, 0x0000003f },
++ { 0xe900276c, ~0x000000ff, 0x0000003f },
++ { 0xe900254c, ~0x00ffff00, 0x00120500 },
++ { 0xe900274c, ~0x00ffff00, 0x00120500 },
++ { 0xe9002564, ~0x0000f000, 0x00005000 },
++ { 0xe9002764, ~0x0000f000, 0x00005000 },
++ { 0xe9002570, ~0x00000018, 0x00000000 },
++ { 0xe9002770, ~0x00000018, 0x00000000 },
++ { 0xe9002514, ~0x38000700, 0x00000100 },
++ { 0xe9002714, ~0x38000700, 0x00000100 },
++ { 0xe9002438, ~0x0000000f, 0x0000000b },
++ { 0xe9002638, ~0x0000000f, 0x0000000b },
++ { 0xe9002414, ~0x0000fe00, 0x00006600 },
++ { 0xe9002614, ~0x0000fe00, 0x00006600 },
++ { 0xe9002540, ~0x00800000, 0x00000000 },
++ { 0xe9002740, ~0x00800000, 0x00000000 },
++};
++
++const struct hsio_table_row hsio_xhci_lpt_lp_bx[] = {
++ { 0xe90021cc, ~0x00001407, 0x00001407 },
++ { 0xe90023cc, ~0x00001407, 0x00001407 },
++ { 0xe9002168, ~0x01000f3c, 0x00000a28 },
++ { 0xe9002368, ~0x01000f3c, 0x00000a28 },
++ { 0xe900216c, ~0x000000ff, 0x0000003f },
++ { 0xe900236c, ~0x000000ff, 0x0000003f },
++ { 0xe900214c, ~0x00ffff00, 0x00120500 },
++ { 0xe900234c, ~0x00ffff00, 0x00120500 },
++ { 0xe9002164, ~0x0000f000, 0x00005000 },
++ { 0xe9002364, ~0x0000f000, 0x00005000 },
++ { 0xe9002170, ~0x00000018, 0x00000000 },
++ { 0xe9002370, ~0x00000018, 0x00000000 },
++ { 0xe9002114, ~0x38000700, 0x00000100 },
++ { 0xe9002314, ~0x38000700, 0x00000100 },
++ { 0xe9002038, ~0x0000000f, 0x0000000b },
++ { 0xe9002238, ~0x0000000f, 0x0000000b },
++ { 0xe9002014, ~0x0000fe00, 0x00006600 },
++ { 0xe9002214, ~0x0000fe00, 0x00006600 },
++ { 0xe9002140, ~0x00800000, 0x00000000 },
++ { 0xe9002340, ~0x00800000, 0x00000000 },
++};
++
++void program_hsio_sata_lpt_lp_bx(const bool is_mobile)
++{
++ const struct hsio_table_row *pch_hsio_table;
++ size_t len;
++
++ pch_hsio_table = hsio_sata_shared_lpt_lp_bx;
++ len = ARRAY_SIZE(hsio_sata_shared_lpt_lp_bx);
++ for (size_t i = 0; i < len; i++)
++ hsio_sata_shared_update_row(pch_hsio_table[i]);
++
++ const uint32_t hsio_sata_value = is_mobile ? 0x00004c5a : 0x00003e67;
++
++ hsio_sata_shared_update(0xea002090, ~0x0000ffff, hsio_sata_value);
++ hsio_sata_shared_update(0xea002290, ~0x0000ffff, hsio_sata_value);
++ hsio_sata_shared_update(0xea002490, ~0x0000ffff, hsio_sata_value);
++ hsio_sata_shared_update(0xea002690, ~0x0000ffff, hsio_sata_value);
++}
++
++void program_hsio_xhci_lpt_lp_bx(void)
++{
++ const struct hsio_table_row *pch_hsio_table;
++ size_t len;
++
++ pch_hsio_table = hsio_xhci_lpt_lp_bx;
++ len = ARRAY_SIZE(hsio_xhci_lpt_lp_bx);
++
++ for (size_t i = 0; i < len; i++)
++ hsio_update_row(pch_hsio_table[i]);
++
++ pch_hsio_table = hsio_xhci_shared_lpt_lp_bx;
++ len = ARRAY_SIZE(hsio_xhci_shared_lpt_lp_bx);
++
++ for (size_t i = 0; i < len; i++)
++ hsio_xhci_shared_update_row(pch_hsio_table[i]);
++}
++
++void program_hsio_igbe_lpt_lp_bx(void)
++{
++ const uint32_t strpfusecfg1 = pci_read_config32(PCI_DEV(0, 0x1c, 0), 0xfc);
++ if (!(strpfusecfg1 & (1 << 19)))
++ return;
++
++ const uint8_t gbe_port = (strpfusecfg1 >> 16) & 0x7;
++ if (gbe_port > 5)
++ return;
++
++ const uint32_t gbe_hsio_base = 0xe900 << 16 | (0x08 + 2 * gbe_port) << 8;
++ hsio_update(gbe_hsio_base + 0x08, ~0xf0000100, 0xe0000100);
++}
+diff --git a/src/southbridge/intel/lynxpoint/pch.h b/src/southbridge/intel/lynxpoint/pch.h
+index 38a9349220..74b4d50017 100644
+--- a/src/southbridge/intel/lynxpoint/pch.h
++++ b/src/southbridge/intel/lynxpoint/pch.h
+@@ -117,6 +117,7 @@ void pch_dmi_setup_physical_layer(void);
+ void pch_dmi_tc_vc_mapping(u32 vc0, u32 vc1, u32 vcp, u32 vcm);
+ void early_usb_init(void);
+ void early_thermal_init(void);
++void early_pch_init_native(int s3resume);
+
+ void usb_ehci_sleep_prepare(pci_devfn_t dev, u8 slp_typ);
+ void usb_ehci_disable(pci_devfn_t dev);
+@@ -271,6 +272,10 @@ void mainboard_config_rcba(void);
+ #define IDE_DECODE_ENABLE (1 << 15)
+ #define IDE_TIM_SEC 0x42 /* IDE timings, secondary */
+
++#define SATA_MAP 0x90
++#define SATA_PCS 0x92
++#define SATA_SCLKG 0x94
++
+ #define SATA_SIRI 0xa0 /* SATA Indexed Register Index */
+ #define SATA_SIRD 0xa4 /* SATA Indexed Register Data */
+ #define SATA_SP 0xd0 /* Scratchpad */
+@@ -580,6 +585,7 @@ void mainboard_config_rcba(void);
+ #define D19IR 0x3168 /* 16bit */
+ #define ACPIIRQEN 0x31e0 /* 32bit */
+ #define OIC 0x31fe /* 16bit */
++#define PRSTS 0x3310 /* 32bit */
+ #define PMSYNC_CONFIG 0x33c4 /* 32bit */
+ #define PMSYNC_CONFIG2 0x33cc /* 32bit */
+ #define SOFT_RESET_CTRL 0x38f4
+--
+2.39.2
+
diff --git a/resources/coreboot/haswell/patches/0008-nb-intel-haswell-Add-native-raminit-scaffolding.patch b/resources/coreboot/haswell/patches/0008-nb-intel-haswell-Add-native-raminit-scaffolding.patch
new file mode 100644
index 00000000..6df828eb
--- /dev/null
+++ b/resources/coreboot/haswell/patches/0008-nb-intel-haswell-Add-native-raminit-scaffolding.patch
@@ -0,0 +1,407 @@
+From 46cdec8cbce15ca11ad9a49a3ee415a78f781997 Mon Sep 17 00:00:00 2001
+From: Angel Pons <th3fanbus@gmail.com>
+Date: Sat, 7 May 2022 00:26:10 +0200
+Subject: [PATCH 08/26] nb/intel/haswell: Add native raminit scaffolding
+
+Implement some scaffolding for Haswell native raminit, like bootmode
+selection, handling of MRC cache and CPU detection.
+
+Change-Id: Icd96649fa045ea7f0f32ae9bfe1e60498d93975b
+Signed-off-by: Angel Pons <th3fanbus@gmail.com>
+---
+ .../intel/haswell/native_raminit/Makefile.inc | 1 +
+ .../haswell/native_raminit/raminit_main.c | 104 ++++++++++
+ .../haswell/native_raminit/raminit_native.c | 189 +++++++++++++++++-
+ .../haswell/native_raminit/raminit_native.h | 34 ++++
+ 4 files changed, 322 insertions(+), 6 deletions(-)
+ create mode 100644 src/northbridge/intel/haswell/native_raminit/raminit_main.c
+ create mode 100644 src/northbridge/intel/haswell/native_raminit/raminit_native.h
+
+diff --git a/src/northbridge/intel/haswell/native_raminit/Makefile.inc b/src/northbridge/intel/haswell/native_raminit/Makefile.inc
+index 8cfb4fb33e..90af951c5a 100644
+--- a/src/northbridge/intel/haswell/native_raminit/Makefile.inc
++++ b/src/northbridge/intel/haswell/native_raminit/Makefile.inc
+@@ -1,3 +1,4 @@
+ ## SPDX-License-Identifier: GPL-2.0-or-later
+
++romstage-y += raminit_main.c
+ romstage-y += raminit_native.c
+diff --git a/src/northbridge/intel/haswell/native_raminit/raminit_main.c b/src/northbridge/intel/haswell/native_raminit/raminit_main.c
+new file mode 100644
+index 0000000000..9b42c25b40
+--- /dev/null
++++ b/src/northbridge/intel/haswell/native_raminit/raminit_main.c
+@@ -0,0 +1,104 @@
++/* SPDX-License-Identifier: GPL-2.0-or-later */
++
++#include <assert.h>
++#include <console/console.h>
++#include <cpu/intel/haswell/haswell.h>
++#include <delay.h>
++#include <device/pci_ops.h>
++#include <northbridge/intel/haswell/chip.h>
++#include <northbridge/intel/haswell/haswell.h>
++#include <northbridge/intel/haswell/raminit.h>
++#include <string.h>
++#include <types.h>
++
++#include "raminit_native.h"
++
++struct task_entry {
++ enum raminit_status (*task)(struct sysinfo *);
++ bool is_enabled;
++ const char *name;
++};
++
++static const struct task_entry cold_boot[] = {
++};
++
++/* Return a generic stepping value to make stepping checks simpler */
++static enum generic_stepping get_stepping(const uint32_t cpuid)
++{
++ switch (cpuid) {
++ case CPUID_HASWELL_A0:
++ die("Haswell stepping A0 is not supported\n");
++ case CPUID_HASWELL_B0:
++ case CPUID_HASWELL_ULT_B0:
++ case CPUID_CRYSTALWELL_B0:
++ return STEPPING_B0;
++ case CPUID_HASWELL_C0:
++ case CPUID_HASWELL_ULT_C0:
++ case CPUID_CRYSTALWELL_C0:
++ return STEPPING_C0;
++ default:
++ /** TODO: Add Broadwell support someday **/
++ die("Unknown CPUID 0x%x\n", cpuid);
++ }
++}
++
++static void initialize_ctrl(struct sysinfo *ctrl)
++{
++ const struct northbridge_intel_haswell_config *cfg = config_of_soc();
++ const enum raminit_boot_mode bootmode = ctrl->bootmode;
++
++ memset(ctrl, 0, sizeof(*ctrl));
++
++ ctrl->cpu = cpu_get_cpuid();
++ ctrl->stepping = get_stepping(ctrl->cpu);
++ ctrl->dq_pins_interleaved = cfg->dq_pins_interleaved;
++ ctrl->bootmode = bootmode;
++}
++
++static enum raminit_status try_raminit(struct sysinfo *ctrl)
++{
++ const struct task_entry *const schedule = cold_boot;
++ const size_t length = ARRAY_SIZE(cold_boot);
++
++ enum raminit_status status = RAMINIT_STATUS_UNSPECIFIED_ERROR;
++
++ for (size_t i = 0; i < length; i++) {
++ const struct task_entry *const entry = &schedule[i];
++ assert(entry);
++ assert(entry->name);
++ if (!entry->is_enabled)
++ continue;
++
++ assert(entry->task);
++ printk(RAM_DEBUG, "\nExecuting raminit task %s\n", entry->name);
++ status = entry->task(ctrl);
++ printk(RAM_DEBUG, "\n");
++ if (status) {
++ printk(BIOS_ERR, "raminit failed on step %s\n", entry->name);
++ break;
++ }
++ }
++
++ return status;
++}
++
++void raminit_main(const enum raminit_boot_mode bootmode)
++{
++ /*
++ * The mighty_ctrl struct. Will happily nuke the pre-RAM stack
++ * if left unattended. Make it static and pass pointers to it.
++ */
++ static struct sysinfo mighty_ctrl;
++
++ mighty_ctrl.bootmode = bootmode;
++ initialize_ctrl(&mighty_ctrl);
++
++ /** TODO: Try more than once **/
++ enum raminit_status status = try_raminit(&mighty_ctrl);
++
++ if (status != RAMINIT_STATUS_SUCCESS)
++ die("Memory initialization was met with utmost failure and misery\n");
++
++ /** TODO: Implement the required magic **/
++ die("NATIVE RAMINIT: More Magic (tm) required.\n");
++}
+diff --git a/src/northbridge/intel/haswell/native_raminit/raminit_native.c b/src/northbridge/intel/haswell/native_raminit/raminit_native.c
+index b6efb6b40d..0869db3902 100644
+--- a/src/northbridge/intel/haswell/native_raminit/raminit_native.c
++++ b/src/northbridge/intel/haswell/native_raminit/raminit_native.c
+@@ -1,13 +1,45 @@
+ /* SPDX-License-Identifier: GPL-2.0-or-later */
+
++#include <arch/cpu.h>
++#include <assert.h>
++#include <cbmem.h>
++#include <cf9_reset.h>
+ #include <console/console.h>
++#include <cpu/x86/msr.h>
+ #include <delay.h>
++#include <device/pci_ops.h>
++#include <mrc_cache.h>
+ #include <northbridge/intel/haswell/haswell.h>
+ #include <northbridge/intel/haswell/raminit.h>
+ #include <southbridge/intel/lynxpoint/me.h>
+ #include <southbridge/intel/lynxpoint/pch.h>
+ #include <types.h>
+
++#include "raminit_native.h"
++
++static void wait_txt_clear(void)
++{
++ const struct cpuid_result cpuid = cpuid_ext(1, 0);
++
++ /* Check if TXT is supported */
++ if (!(cpuid.ecx & BIT(6)))
++ return;
++
++ /* Some TXT public bit */
++ if (!(read32p(0xfed30010) & 1))
++ return;
++
++ /* Wait for TXT clear */
++ do {} while (!(read8p(0xfed40000) & (1 << 7)));
++}
++
++static enum raminit_boot_mode get_boot_mode(void)
++{
++ const uint16_t pmcon_2 = pci_read_config16(PCH_LPC_DEV, GEN_PMCON_2);
++ const uint16_t bitmask = GEN_PMCON_2_DISB | GEN_PMCON_2_MEM_SR;
++ return (pmcon_2 & bitmask) == bitmask ? BOOTMODE_WARM : BOOTMODE_COLD;
++}
++
+ static bool early_init_native(int s3resume)
+ {
+ printk(BIOS_DEBUG, "Starting native platform initialisation\n");
+@@ -24,6 +56,120 @@ static bool early_init_native(int s3resume)
+ return cpu_replaced;
+ }
+
++#define MRC_CACHE_VERSION 1
++
++struct mrc_data {
++ const void *buffer;
++ size_t buffer_len;
++};
++
++static void save_mrc_data(struct mrc_data *md)
++{
++ mrc_cache_stash_data(MRC_TRAINING_DATA, MRC_CACHE_VERSION, md->buffer, md->buffer_len);
++}
++
++static struct mrc_data prepare_mrc_cache(void)
++{
++ struct mrc_data md = {0};
++ md.buffer = mrc_cache_current_mmap_leak(MRC_TRAINING_DATA,
++ MRC_CACHE_VERSION,
++ &md.buffer_len);
++ return md;
++}
++
++static const char *const bm_names[] = {
++ "BOOTMODE_COLD",
++ "BOOTMODE_WARM",
++ "BOOTMODE_S3",
++ "BOOTMODE_FAST",
++};
++
++static void clear_disb(void)
++{
++ pci_and_config16(PCH_LPC_DEV, GEN_PMCON_2, ~GEN_PMCON_2_DISB);
++}
++
++static void raminit_reset(void)
++{
++ clear_disb();
++ system_reset();
++}
++
++static enum raminit_boot_mode do_actual_raminit(
++ struct mrc_data *md,
++ const bool s3resume,
++ const bool cpu_replaced,
++ const enum raminit_boot_mode orig_bootmode)
++{
++ enum raminit_boot_mode bootmode = orig_bootmode;
++
++ bool save_data_valid = md->buffer && md->buffer_len == USHRT_MAX; /** TODO: sizeof() **/
++
++ if (s3resume) {
++ if (bootmode == BOOTMODE_COLD) {
++ printk(BIOS_EMERG, "Memory may not be in self-refresh for S3 resume\n");
++ printk(BIOS_EMERG, "S3 resume and cold boot are mutually exclusive\n");
++ raminit_reset();
++ }
++ /* Only a true mad hatter would replace a CPU in S3 */
++ if (cpu_replaced) {
++ printk(BIOS_EMERG, "Oh no, CPU was replaced during S3\n");
++ /*
++ * No reason to continue, memory consistency is most likely lost
++ * and ME will probably request a reset through DID response too.
++ */
++ /** TODO: Figure out why past self commented this out **/
++ //raminit_reset();
++ }
++ bootmode = BOOTMODE_S3;
++ if (!save_data_valid) {
++ printk(BIOS_EMERG, "No training data, S3 resume is impossible\n");
++ /* Failed S3 resume, reset to come up cleanly */
++ raminit_reset();
++ }
++ }
++ if (!s3resume && cpu_replaced) {
++ printk(BIOS_NOTICE, "CPU was replaced, forcing a cold boot\n");
++ /*
++ * Looks like the ME will get angry if raminit takes too long.
++ * It will report that the CPU has been replaced on next boot.
++ * Try to continue anyway. This should not happen in most cases.
++ */
++ /** TODO: Figure out why past self commented this out **/
++ //save_data_valid = false;
++ }
++ if (bootmode == BOOTMODE_COLD) {
++ /* If possible, promote to a fast boot */
++ if (save_data_valid)
++ bootmode = BOOTMODE_FAST;
++
++ clear_disb();
++ } else if (bootmode == BOOTMODE_WARM) {
++ /* If a warm reset happened before raminit is done, force a cold boot */
++ if (mchbar_read32(SSKPD) == 0 && mchbar_read32(SSKPD + 4) == 0) {
++ printk(BIOS_NOTICE, "Warm reset occurred early in cold boot\n");
++ save_data_valid = false;
++ }
++ if (!save_data_valid)
++ bootmode = BOOTMODE_COLD;
++ }
++ assert(save_data_valid != (bootmode == BOOTMODE_COLD));
++ if (save_data_valid) {
++ printk(BIOS_INFO, "Using cached memory parameters\n");
++ die("RAMINIT: Fast boot is not yet implemented\n");
++ }
++ printk(RAM_DEBUG, "Initial bootmode: %s\n", bm_names[orig_bootmode]);
++ printk(RAM_DEBUG, "Current bootmode: %s\n", bm_names[bootmode]);
++
++ /*
++ * And now, the actual memory initialization thing.
++ */
++ printk(RAM_DEBUG, "\nStarting native raminit\n");
++ raminit_main(bootmode);
++
++ return bootmode;
++}
++
+ void perform_raminit(const int s3resume)
+ {
+ /*
+@@ -32,17 +178,48 @@ void perform_raminit(const int s3resume)
+ */
+ const bool cpu_replaced = early_init_native(s3resume);
+
+- (void)cpu_replaced;
++ wait_txt_clear();
++ wrmsr(0x2e6, (msr_t) {.lo = 0, .hi = 0});
++
++ const enum raminit_boot_mode orig_bootmode = get_boot_mode();
++
++ struct mrc_data md = prepare_mrc_cache();
++
++ const enum raminit_boot_mode bootmode =
++ do_actual_raminit(&md, s3resume, cpu_replaced, orig_bootmode);
++
++ /** TODO: report_memory_config **/
+
+- /** TODO: Move after raminit */
+ if (intel_early_me_uma_size() > 0) {
+- /** TODO: Update status once raminit is implemented **/
+- uint8_t me_status = ME_INIT_STATUS_ERROR;
++ /*
++ * The 'other' success value is to report loss of memory
++ * consistency to ME if warm boot was downgraded to cold.
++ */
++ uint8_t me_status;
++ if (BOOTMODE_WARM == orig_bootmode && BOOTMODE_COLD == bootmode)
++ me_status = ME_INIT_STATUS_SUCCESS_OTHER;
++ else
++ me_status = ME_INIT_STATUS_SUCCESS;
++
++ /** TODO: Remove this once raminit is implemented **/
++ me_status = ME_INIT_STATUS_ERROR;
+ intel_early_me_init_done(me_status);
+ }
+
++ post_code(0x3b);
++
+ intel_early_me_status();
+
+- /** TODO: Implement the required magic **/
+- die("NATIVE RAMINIT: More Magic (tm) required.\n");
++ const bool cbmem_was_initted = !cbmem_recovery(s3resume);
++ if (s3resume && !cbmem_was_initted) {
++ /* Failed S3 resume, reset to come up cleanly */
++ printk(BIOS_CRIT, "Failed to recover CBMEM in S3 resume.\n");
++ system_reset();
++ }
++
++ /* Save training data on non-S3 resumes */
++ if (!s3resume)
++ save_mrc_data(&md);
++
++ /** TODO: setup_sdram_meminfo **/
+ }
+diff --git a/src/northbridge/intel/haswell/native_raminit/raminit_native.h b/src/northbridge/intel/haswell/native_raminit/raminit_native.h
+new file mode 100644
+index 0000000000..885f0184f4
+--- /dev/null
++++ b/src/northbridge/intel/haswell/native_raminit/raminit_native.h
+@@ -0,0 +1,34 @@
++/* SPDX-License-Identifier: GPL-2.0-or-later */
++
++#ifndef HASWELL_RAMINIT_NATIVE_H
++#define HASWELL_RAMINIT_NATIVE_H
++
++enum raminit_boot_mode {
++ BOOTMODE_COLD,
++ BOOTMODE_WARM,
++ BOOTMODE_S3,
++ BOOTMODE_FAST,
++};
++
++enum raminit_status {
++ RAMINIT_STATUS_SUCCESS = 0,
++ RAMINIT_STATUS_UNSPECIFIED_ERROR, /** TODO: Deprecated in favor of specific values **/
++};
++
++enum generic_stepping {
++ STEPPING_A0 = 1,
++ STEPPING_B0 = 2,
++ STEPPING_C0 = 3,
++};
++
++struct sysinfo {
++ enum raminit_boot_mode bootmode;
++ enum generic_stepping stepping;
++ uint32_t cpu; /* CPUID value */
++
++ bool dq_pins_interleaved;
++};
++
++void raminit_main(enum raminit_boot_mode bootmode);
++
++#endif
+--
+2.39.2
+
diff --git a/resources/coreboot/haswell/patches/0009-nb-intel-haswell-nri-Only-do-CPU-replacement-check-o.patch b/resources/coreboot/haswell/patches/0009-nb-intel-haswell-nri-Only-do-CPU-replacement-check-o.patch
new file mode 100644
index 00000000..07525d18
--- /dev/null
+++ b/resources/coreboot/haswell/patches/0009-nb-intel-haswell-nri-Only-do-CPU-replacement-check-o.patch
@@ -0,0 +1,57 @@
+From 731216aef3129ae27ad5adc7266cb8a58090c9fc Mon Sep 17 00:00:00 2001
+From: Angel Pons <th3fanbus@gmail.com>
+Date: Sun, 26 Jun 2022 10:32:12 +0200
+Subject: [PATCH 09/26] nb/intel/haswell/nri: Only do CPU replacement check on
+ cold boots
+
+CPU replacement check should only be done on cold boots.
+
+Change-Id: I98efa105f4df755b23febe12dd7b356787847852
+Signed-off-by: Angel Pons <th3fanbus@gmail.com>
+---
+ .../intel/haswell/native_raminit/raminit_native.c | 13 ++++++-------
+ 1 file changed, 6 insertions(+), 7 deletions(-)
+
+diff --git a/src/northbridge/intel/haswell/native_raminit/raminit_native.c b/src/northbridge/intel/haswell/native_raminit/raminit_native.c
+index 0869db3902..bd9bc8e692 100644
+--- a/src/northbridge/intel/haswell/native_raminit/raminit_native.c
++++ b/src/northbridge/intel/haswell/native_raminit/raminit_native.c
+@@ -40,15 +40,14 @@ static enum raminit_boot_mode get_boot_mode(void)
+ return (pmcon_2 & bitmask) == bitmask ? BOOTMODE_WARM : BOOTMODE_COLD;
+ }
+
+-static bool early_init_native(int s3resume)
++static bool early_init_native(enum raminit_boot_mode bootmode)
+ {
+ printk(BIOS_DEBUG, "Starting native platform initialisation\n");
+
+ intel_early_me_init();
+- /** TODO: CPU replacement check must be skipped in warm boots and S3 resumes **/
+- const bool cpu_replaced = !s3resume && intel_early_me_cpu_replacement_check();
++ bool cpu_replaced = bootmode == BOOTMODE_COLD && intel_early_me_cpu_replacement_check();
+
+- early_pch_init_native(s3resume);
++ early_pch_init_native(bootmode == BOOTMODE_S3);
+
+ if (!CONFIG(INTEL_LYNXPOINT_LP))
+ dmi_early_init();
+@@ -176,13 +175,13 @@ void perform_raminit(const int s3resume)
+ * See, this function's name is a lie. There are more things to
+ * do that memory initialisation, but they are relatively easy.
+ */
+- const bool cpu_replaced = early_init_native(s3resume);
++ const enum raminit_boot_mode orig_bootmode = get_boot_mode();
++
++ const bool cpu_replaced = early_init_native(s3resume ? BOOTMODE_S3 : orig_bootmode);
+
+ wait_txt_clear();
+ wrmsr(0x2e6, (msr_t) {.lo = 0, .hi = 0});
+
+- const enum raminit_boot_mode orig_bootmode = get_boot_mode();
+-
+ struct mrc_data md = prepare_mrc_cache();
+
+ const enum raminit_boot_mode bootmode =
+--
+2.39.2
+
diff --git a/resources/coreboot/haswell/patches/0010-haswell-NRI-Collect-SPD-info.patch b/resources/coreboot/haswell/patches/0010-haswell-NRI-Collect-SPD-info.patch
new file mode 100644
index 00000000..4c2a2670
--- /dev/null
+++ b/resources/coreboot/haswell/patches/0010-haswell-NRI-Collect-SPD-info.patch
@@ -0,0 +1,344 @@
+From 354969af4361bcc7dc240ef5871d169728f7f0cc Mon Sep 17 00:00:00 2001
+From: Angel Pons <th3fanbus@gmail.com>
+Date: Sat, 7 May 2022 13:48:53 +0200
+Subject: [PATCH 10/26] haswell NRI: Collect SPD info
+
+Collect SPD data from DIMMs and memory-down, and find the common
+supported settings.
+
+Change-Id: I4e6a1408a638a463ecae37a447cfed1d6556e44a
+Signed-off-by: Angel Pons <th3fanbus@gmail.com>
+---
+ .../intel/haswell/native_raminit/Makefile.inc | 1 +
+ .../haswell/native_raminit/raminit_main.c | 1 +
+ .../haswell/native_raminit/raminit_native.h | 57 +++++
+ .../haswell/native_raminit/spd_bitmunching.c | 206 ++++++++++++++++++
+ 4 files changed, 265 insertions(+)
+ create mode 100644 src/northbridge/intel/haswell/native_raminit/spd_bitmunching.c
+
+diff --git a/src/northbridge/intel/haswell/native_raminit/Makefile.inc b/src/northbridge/intel/haswell/native_raminit/Makefile.inc
+index 90af951c5a..ebf7abc6ec 100644
+--- a/src/northbridge/intel/haswell/native_raminit/Makefile.inc
++++ b/src/northbridge/intel/haswell/native_raminit/Makefile.inc
+@@ -2,3 +2,4 @@
+
+ romstage-y += raminit_main.c
+ romstage-y += raminit_native.c
++romstage-y += spd_bitmunching.c
+diff --git a/src/northbridge/intel/haswell/native_raminit/raminit_main.c b/src/northbridge/intel/haswell/native_raminit/raminit_main.c
+index 9b42c25b40..2d2cfa48bb 100644
+--- a/src/northbridge/intel/haswell/native_raminit/raminit_main.c
++++ b/src/northbridge/intel/haswell/native_raminit/raminit_main.c
+@@ -20,6 +20,7 @@ struct task_entry {
+ };
+
+ static const struct task_entry cold_boot[] = {
++ { collect_spd_info, true, "PROCSPD", },
+ };
+
+ /* Return a generic stepping value to make stepping checks simpler */
+diff --git a/src/northbridge/intel/haswell/native_raminit/raminit_native.h b/src/northbridge/intel/haswell/native_raminit/raminit_native.h
+index 885f0184f4..1a0793947e 100644
+--- a/src/northbridge/intel/haswell/native_raminit/raminit_native.h
++++ b/src/northbridge/intel/haswell/native_raminit/raminit_native.h
+@@ -3,6 +3,15 @@
+ #ifndef HASWELL_RAMINIT_NATIVE_H
+ #define HASWELL_RAMINIT_NATIVE_H
+
++#include <device/dram/ddr3.h>
++#include <northbridge/intel/haswell/haswell.h>
++
++#define SPD_LEN 256
++
++/* 8 data lanes + 1 ECC lane */
++#define NUM_LANES 9
++#define NUM_LANES_NO_ECC 8
++
+ enum raminit_boot_mode {
+ BOOTMODE_COLD,
+ BOOTMODE_WARM,
+@@ -12,6 +21,8 @@ enum raminit_boot_mode {
+
+ enum raminit_status {
+ RAMINIT_STATUS_SUCCESS = 0,
++ RAMINIT_STATUS_NO_MEMORY_INSTALLED,
++ RAMINIT_STATUS_UNSUPPORTED_MEMORY,
+ RAMINIT_STATUS_UNSPECIFIED_ERROR, /** TODO: Deprecated in favor of specific values **/
+ };
+
+@@ -21,14 +32,60 @@ enum generic_stepping {
+ STEPPING_C0 = 3,
+ };
+
++struct raminit_dimm_info {
++ spd_raw_data raw_spd;
++ struct dimm_attr_ddr3_st data;
++ uint8_t spd_addr;
++ bool valid;
++};
++
+ struct sysinfo {
+ enum raminit_boot_mode bootmode;
+ enum generic_stepping stepping;
+ uint32_t cpu; /* CPUID value */
+
+ bool dq_pins_interleaved;
++
++ /** TODO: ECC support untested **/
++ bool is_ecc;
++
++ /**
++ * FIXME: LPDDR support is incomplete. The largest chunks are missing,
++ * but some LPDDR-specific variations in algorithms have been handled.
++ * LPDDR-specific functions have stubs which will halt upon execution.
++ */
++ bool lpddr;
++
++ struct raminit_dimm_info dimms[NUM_CHANNELS][NUM_SLOTS];
++ union dimm_flags_ddr3_st flags;
++ uint16_t cas_supported;
++
++ /* Except for tCK, everything is eventually stored in DCLKs */
++ uint32_t tCK;
++ uint32_t tAA; /* Also known as tCL */
++ uint32_t tWR;
++ uint32_t tRCD;
++ uint32_t tRRD;
++ uint32_t tRP;
++ uint32_t tRAS;
++ uint32_t tRC;
++ uint32_t tRFC;
++ uint32_t tWTR;
++ uint32_t tRTP;
++ uint32_t tFAW;
++ uint32_t tCWL;
++ uint32_t tCMD;
++
++ uint8_t lanes; /* 8 or 9 */
++ uint8_t chanmap;
++ uint8_t dpc[NUM_CHANNELS]; /* DIMMs per channel */
++ uint8_t rankmap[NUM_CHANNELS];
++ uint8_t rank_mirrored[NUM_CHANNELS];
++ uint32_t channel_size_mb[NUM_CHANNELS];
+ };
+
+ void raminit_main(enum raminit_boot_mode bootmode);
+
++enum raminit_status collect_spd_info(struct sysinfo *ctrl);
++
+ #endif
+diff --git a/src/northbridge/intel/haswell/native_raminit/spd_bitmunching.c b/src/northbridge/intel/haswell/native_raminit/spd_bitmunching.c
+new file mode 100644
+index 0000000000..dbe02c72d0
+--- /dev/null
++++ b/src/northbridge/intel/haswell/native_raminit/spd_bitmunching.c
+@@ -0,0 +1,206 @@
++/* SPDX-License-Identifier: GPL-2.0-or-later */
++
++#include <cbfs.h>
++#include <commonlib/clamp.h>
++#include <console/console.h>
++#include <device/dram/ddr3.h>
++#include <device/smbus_host.h>
++#include <northbridge/intel/haswell/haswell.h>
++#include <northbridge/intel/haswell/raminit.h>
++#include <string.h>
++#include <types.h>
++
++#include "raminit_native.h"
++
++static const uint8_t *get_spd_data_from_cbfs(struct spd_info *spdi)
++{
++ if (!CONFIG(HAVE_SPD_IN_CBFS))
++ return NULL;
++
++ printk(RAM_DEBUG, "SPD index %u\n", spdi->spd_index);
++
++ size_t spd_file_len;
++ uint8_t *spd_file = cbfs_map("spd.bin", &spd_file_len);
++
++ if (!spd_file) {
++ printk(BIOS_ERR, "SPD data not found in CBFS\n");
++ return NULL;
++ }
++
++ if (spd_file_len < ((spdi->spd_index + 1) * SPD_LEN)) {
++ printk(BIOS_ERR, "SPD index override to 0 - old hardware?\n");
++ spdi->spd_index = 0;
++ }
++
++ if (spd_file_len < SPD_LEN) {
++ printk(BIOS_ERR, "Invalid SPD data in CBFS\n");
++ return NULL;
++ }
++
++ return spd_file + (spdi->spd_index * SPD_LEN);
++}
++
++static void get_spd_for_dimm(struct raminit_dimm_info *const dimm, const uint8_t *cbfs_spd)
++{
++ if (dimm->spd_addr == SPD_MEMORY_DOWN) {
++ if (cbfs_spd) {
++ memcpy(dimm->raw_spd, cbfs_spd, SPD_LEN);
++ dimm->valid = true;
++ printk(RAM_DEBUG, "memory-down\n");
++ return;
++ } else {
++ printk(RAM_DEBUG, "memory-down but no CBFS SPD data, ignoring\n");
++ return;
++ }
++ }
++ printk(RAM_DEBUG, "slotted ");
++ const uint8_t spd_mem_type = smbus_read_byte(dimm->spd_addr, SPD_MEMORY_TYPE);
++ if (spd_mem_type != SPD_MEMORY_TYPE_SDRAM_DDR3) {
++ printk(RAM_DEBUG, "and not DDR3, ignoring\n");
++ return;
++ }
++ printk(RAM_DEBUG, "and DDR3\n");
++ if (i2c_eeprom_read(dimm->spd_addr, 0, SPD_LEN, dimm->raw_spd) != SPD_LEN) {
++ printk(BIOS_WARNING, "I2C block read failed, trying SMBus byte reads\n");
++ for (uint32_t i = 0; i < SPD_LEN; i++)
++ dimm->raw_spd[i] = smbus_read_byte(dimm->spd_addr, i);
++ }
++ dimm->valid = true;
++}
++
++static void get_spd_data(struct sysinfo *ctrl)
++{
++ struct spd_info spdi = {0};
++ mb_get_spd_map(&spdi);
++ const uint8_t *cbfs_spd = get_spd_data_from_cbfs(&spdi);
++ for (uint8_t channel = 0; channel < NUM_CHANNELS; channel++) {
++ for (uint8_t slot = 0; slot < NUM_SLOTS; slot++) {
++ struct raminit_dimm_info *const dimm = &ctrl->dimms[channel][slot];
++ dimm->spd_addr = spdi.addresses[channel + channel + slot];
++ if (!dimm->spd_addr)
++ continue;
++
++ printk(RAM_DEBUG, "CH%uS%u is ", channel, slot);
++ get_spd_for_dimm(dimm, cbfs_spd);
++ }
++ }
++}
++
++static void decode_spd(struct raminit_dimm_info *const dimm)
++{
++ /** TODO: Hook up somewhere, and handle lack of XMP data **/
++ const bool enable_xmp = false;
++ memset(&dimm->data, 0, sizeof(dimm->data));
++ if (enable_xmp)
++ spd_xmp_decode_ddr3(&dimm->data, dimm->raw_spd, DDR3_XMP_PROFILE_1);
++ else
++ spd_decode_ddr3(&dimm->data, dimm->raw_spd);
++
++ if (CONFIG(DEBUG_RAM_SETUP))
++ dram_print_spd_ddr3(&dimm->data);
++}
++
++static enum raminit_status find_common_spd_parameters(struct sysinfo *ctrl)
++{
++ ctrl->cas_supported = 0xffff;
++ ctrl->flags.raw = 0xffffffff;
++
++ ctrl->tCK = 0;
++ ctrl->tAA = 0;
++ ctrl->tWR = 0;
++ ctrl->tRCD = 0;
++ ctrl->tRRD = 0;
++ ctrl->tRP = 0;
++ ctrl->tRAS = 0;
++ ctrl->tRC = 0;
++ ctrl->tRFC = 0;
++ ctrl->tWTR = 0;
++ ctrl->tRTP = 0;
++ ctrl->tFAW = 0;
++ ctrl->tCWL = 0;
++ ctrl->tCMD = 0;
++ ctrl->chanmap = 0;
++
++ bool yes_ecc = false;
++ bool not_ecc = false;
++
++ for (uint8_t channel = 0; channel < NUM_CHANNELS; channel++) {
++ ctrl->dpc[channel] = 0;
++ ctrl->rankmap[channel] = 0;
++ ctrl->rank_mirrored[channel] = 0;
++ ctrl->channel_size_mb[channel] = 0;
++ for (uint8_t slot = 0; slot < NUM_SLOTS; slot++) {
++ struct raminit_dimm_info *const dimm = &ctrl->dimms[channel][slot];
++ if (!dimm->valid)
++ continue;
++
++ printk(RAM_DEBUG, "\nCH%uS%u SPD:\n", channel, slot);
++ decode_spd(dimm);
++
++ ctrl->chanmap |= BIT(channel);
++ ctrl->dpc[channel]++;
++ ctrl->channel_size_mb[channel] += dimm->data.size_mb;
++
++ /* The first rank of a populated slot is always present */
++ const uint8_t rank = slot + slot;
++ assert(dimm->data.ranks);
++ ctrl->rankmap[channel] |= (BIT(dimm->data.ranks) - 1) << rank;
++
++ if (dimm->data.flags.pins_mirrored)
++ ctrl->rank_mirrored[channel] |= BIT(rank + 1);
++
++ /* Find common settings */
++ ctrl->cas_supported &= dimm->data.cas_supported;
++ ctrl->flags.raw &= dimm->data.flags.raw;
++ ctrl->tCK = MAX(ctrl->tCK, dimm->data.tCK);
++ ctrl->tAA = MAX(ctrl->tAA, dimm->data.tAA);
++ ctrl->tWR = MAX(ctrl->tWR, dimm->data.tWR);
++ ctrl->tRCD = MAX(ctrl->tRCD, dimm->data.tRCD);
++ ctrl->tRRD = MAX(ctrl->tRRD, dimm->data.tRRD);
++ ctrl->tRP = MAX(ctrl->tRP, dimm->data.tRP);
++ ctrl->tRAS = MAX(ctrl->tRAS, dimm->data.tRAS);
++ ctrl->tRC = MAX(ctrl->tRC, dimm->data.tRC);
++ ctrl->tRFC = MAX(ctrl->tRFC, dimm->data.tRFC);
++ ctrl->tWTR = MAX(ctrl->tWTR, dimm->data.tWTR);
++ ctrl->tRTP = MAX(ctrl->tRTP, dimm->data.tRTP);
++ ctrl->tFAW = MAX(ctrl->tFAW, dimm->data.tFAW);
++ ctrl->tCWL = MAX(ctrl->tCWL, dimm->data.tCWL);
++ ctrl->tCMD = MAX(ctrl->tCMD, dimm->data.tCMD);
++
++ yes_ecc |= dimm->data.flags.is_ecc;
++ not_ecc |= !dimm->data.flags.is_ecc;
++ }
++ }
++
++ if (!ctrl->chanmap) {
++ printk(BIOS_ERR, "No DIMMs were found\n");
++ return RAMINIT_STATUS_NO_MEMORY_INSTALLED;
++ }
++ if (!ctrl->cas_supported) {
++ printk(BIOS_ERR, "Could not resolve common CAS latency\n");
++ return RAMINIT_STATUS_UNSUPPORTED_MEMORY;
++ }
++ /** TODO: Properly handle ECC support and ECC forced **/
++ if (yes_ecc && not_ecc) {
++ /** TODO: Test if the ECC DIMMs can be operated as non-ECC DIMMs **/
++ printk(BIOS_ERR, "Both ECC and non-ECC DIMMs present, this is unsupported\n");
++ return RAMINIT_STATUS_UNSUPPORTED_MEMORY;
++ }
++ if (yes_ecc)
++ ctrl->lanes = NUM_LANES;
++ else
++ ctrl->lanes = NUM_LANES_NO_ECC;
++
++ ctrl->is_ecc = yes_ecc;
++
++ /** TODO: Complete LPDDR support **/
++ ctrl->lpddr = false;
++
++ return RAMINIT_STATUS_SUCCESS;
++}
++
++enum raminit_status collect_spd_info(struct sysinfo *ctrl)
++{
++ get_spd_data(ctrl);
++ return find_common_spd_parameters(ctrl);
++}
+--
+2.39.2
+
diff --git a/resources/coreboot/haswell/patches/0011-haswell-NRI-Initialise-MPLL.patch b/resources/coreboot/haswell/patches/0011-haswell-NRI-Initialise-MPLL.patch
new file mode 100644
index 00000000..1fec2e38
--- /dev/null
+++ b/resources/coreboot/haswell/patches/0011-haswell-NRI-Initialise-MPLL.patch
@@ -0,0 +1,346 @@
+From 77a89d55ab7a715dc20c34a6edacaaf781b56087 Mon Sep 17 00:00:00 2001
+From: Angel Pons <th3fanbus@gmail.com>
+Date: Sat, 7 May 2022 14:36:10 +0200
+Subject: [PATCH 11/26] haswell NRI: Initialise MPLL
+
+Add code to initialise the MPLL (Memory PLL). The procedure is similar
+to the one for Sandy/Ivy Bridge, but it is not worth factoring out.
+
+Change-Id: I978c352de68f6d8cecc76f4ae3c12daaf4be9ed6
+Signed-off-by: Angel Pons <th3fanbus@gmail.com>
+---
+ .../intel/haswell/native_raminit/Makefile.inc | 2 +
+ .../intel/haswell/native_raminit/init_mpll.c | 210 ++++++++++++++++++
+ .../haswell/native_raminit/io_comp_control.c | 22 ++
+ .../haswell/native_raminit/raminit_main.c | 1 +
+ .../haswell/native_raminit/raminit_native.h | 11 +
+ .../intel/haswell/registers/mchbar.h | 3 +
+ 6 files changed, 249 insertions(+)
+ create mode 100644 src/northbridge/intel/haswell/native_raminit/init_mpll.c
+ create mode 100644 src/northbridge/intel/haswell/native_raminit/io_comp_control.c
+
+diff --git a/src/northbridge/intel/haswell/native_raminit/Makefile.inc b/src/northbridge/intel/haswell/native_raminit/Makefile.inc
+index ebf7abc6ec..c125d84f0b 100644
+--- a/src/northbridge/intel/haswell/native_raminit/Makefile.inc
++++ b/src/northbridge/intel/haswell/native_raminit/Makefile.inc
+@@ -1,5 +1,7 @@
+ ## SPDX-License-Identifier: GPL-2.0-or-later
+
++romstage-y += init_mpll.c
++romstage-y += io_comp_control.c
+ romstage-y += raminit_main.c
+ romstage-y += raminit_native.c
+ romstage-y += spd_bitmunching.c
+diff --git a/src/northbridge/intel/haswell/native_raminit/init_mpll.c b/src/northbridge/intel/haswell/native_raminit/init_mpll.c
+new file mode 100644
+index 0000000000..2faa183724
+--- /dev/null
++++ b/src/northbridge/intel/haswell/native_raminit/init_mpll.c
+@@ -0,0 +1,210 @@
++/* SPDX-License-Identifier: GPL-2.0-or-later */
++
++#include <commonlib/clamp.h>
++#include <console/console.h>
++#include <delay.h>
++#include <device/pci_ops.h>
++#include <northbridge/intel/haswell/haswell.h>
++#include <types.h>
++
++#include "raminit_native.h"
++
++static uint32_t get_mem_multiplier(const struct sysinfo *ctrl)
++{
++ const uint32_t mult = NS2MHZ_DIV256 / (ctrl->tCK * ctrl->base_freq);
++
++ if (ctrl->base_freq == 100)
++ return clamp_u32(7, mult, 12);
++
++ if (ctrl->base_freq == 133)
++ return clamp_u32(3, mult, 10);
++
++ die("Unsupported base frequency\n");
++}
++
++static void normalize_tck(struct sysinfo *ctrl, const bool pll_ref100)
++{
++ /** TODO: Haswell supports up to DDR3-2600 **/
++ if (ctrl->tCK <= TCK_1200MHZ) {
++ ctrl->tCK = TCK_1200MHZ;
++ ctrl->base_freq = 133;
++ ctrl->mem_clock_mhz = 1200;
++
++ } else if (ctrl->tCK <= TCK_1100MHZ) {
++ ctrl->tCK = TCK_1100MHZ;
++ ctrl->base_freq = 100;
++ ctrl->mem_clock_mhz = 1100;
++
++ } else if (ctrl->tCK <= TCK_1066MHZ) {
++ ctrl->tCK = TCK_1066MHZ;
++ ctrl->base_freq = 133;
++ ctrl->mem_clock_mhz = 1066;
++
++ } else if (ctrl->tCK <= TCK_1000MHZ) {
++ ctrl->tCK = TCK_1000MHZ;
++ ctrl->base_freq = 100;
++ ctrl->mem_clock_mhz = 1000;
++
++ } else if (ctrl->tCK <= TCK_933MHZ) {
++ ctrl->tCK = TCK_933MHZ;
++ ctrl->base_freq = 133;
++ ctrl->mem_clock_mhz = 933;
++
++ } else if (ctrl->tCK <= TCK_900MHZ) {
++ ctrl->tCK = TCK_900MHZ;
++ ctrl->base_freq = 100;
++ ctrl->mem_clock_mhz = 900;
++
++ } else if (ctrl->tCK <= TCK_800MHZ) {
++ ctrl->tCK = TCK_800MHZ;
++ ctrl->base_freq = 133;
++ ctrl->mem_clock_mhz = 800;
++
++ } else if (ctrl->tCK <= TCK_700MHZ) {
++ ctrl->tCK = TCK_700MHZ;
++ ctrl->base_freq = 100;
++ ctrl->mem_clock_mhz = 700;
++
++ } else if (ctrl->tCK <= TCK_666MHZ) {
++ ctrl->tCK = TCK_666MHZ;
++ ctrl->base_freq = 133;
++ ctrl->mem_clock_mhz = 666;
++
++ } else if (ctrl->tCK <= TCK_533MHZ) {
++ ctrl->tCK = TCK_533MHZ;
++ ctrl->base_freq = 133;
++ ctrl->mem_clock_mhz = 533;
++
++ } else if (ctrl->tCK <= TCK_400MHZ) {
++ ctrl->tCK = TCK_400MHZ;
++ ctrl->base_freq = 133;
++ ctrl->mem_clock_mhz = 400;
++
++ } else {
++ ctrl->tCK = 0;
++ ctrl->base_freq = 1;
++ ctrl->mem_clock_mhz = 0;
++ return;
++ }
++ if (!pll_ref100 && ctrl->base_freq == 100) {
++ /* Skip unsupported frequency */
++ ctrl->tCK++;
++ normalize_tck(ctrl, pll_ref100);
++ }
++}
++
++#define MIN_CAS 4
++#define MAX_CAS 24
++
++static uint8_t find_compatible_cas(struct sysinfo *ctrl)
++{
++ printk(RAM_DEBUG, "With tCK %u, try CAS: ", ctrl->tCK);
++ const uint8_t cas_lower = MAX(MIN_CAS, DIV_ROUND_UP(ctrl->tAA, ctrl->tCK));
++ const uint8_t cas_upper = MIN(MAX_CAS, 19); /* JEDEC MR0 limit */
++
++ if (!(ctrl->cas_supported >> (cas_lower - MIN_CAS))) {
++ printk(RAM_DEBUG, "DIMMs do not support CAS >= %u\n", cas_lower);
++ ctrl->tCK++;
++ return 0;
++ }
++ for (uint8_t cas = cas_lower; cas <= cas_upper; cas++) {
++ printk(RAM_DEBUG, "%u ", cas);
++ if (ctrl->cas_supported & BIT(cas - MIN_CAS)) {
++ printk(RAM_DEBUG, "OK\n");
++ return cas;
++ }
++ }
++ return 0;
++}
++
++static enum raminit_status find_cas_tck(struct sysinfo *ctrl)
++{
++ /** TODO: Honor all possible PLL_REF100_CFG values **/
++ uint8_t pll_ref100 = (pci_read_config32(HOST_BRIDGE, CAPID0_B) >> 21) & 0x7;
++ printk(RAM_DEBUG, "PLL_REF100_CFG value: 0x%x\n", pll_ref100);
++ printk(RAM_DEBUG, "100MHz reference clock support: %s\n", pll_ref100 ? "yes" : "no");
++
++ uint8_t selected_cas;
++ while (true) {
++ /* Round tCK up so that it is a multiple of either 133 or 100 MHz */
++ normalize_tck(ctrl, pll_ref100);
++ if (!ctrl->tCK) {
++ printk(BIOS_ERR, "Couldn't find compatible clock / CAS settings\n");
++ return RAMINIT_STATUS_MPLL_INIT_FAILURE;
++ }
++ selected_cas = find_compatible_cas(ctrl);
++ if (selected_cas)
++ break;
++
++ ctrl->tCK++;
++ }
++ printk(BIOS_DEBUG, "Found compatible clock / CAS settings\n");
++ printk(BIOS_DEBUG, "Selected DRAM frequency: %u MHz\n", NS2MHZ_DIV256 / ctrl->tCK);
++ printk(BIOS_DEBUG, "Selected CAS latency : %uT\n", selected_cas);
++ ctrl->multiplier = get_mem_multiplier(ctrl);
++ return RAMINIT_STATUS_SUCCESS;
++}
++
++enum raminit_status initialise_mpll(struct sysinfo *ctrl)
++{
++ if (ctrl->tCK > TCK_400MHZ) {
++ printk(BIOS_ERR, "tCK is too slow. Increasing to 400 MHz as last resort\n");
++ ctrl->tCK = TCK_400MHZ;
++ }
++ while (true) {
++ if (!ctrl->qclkps) {
++ const enum raminit_status status = find_cas_tck(ctrl);
++ if (status)
++ return status;
++ }
++
++ /*
++ * Unlike previous generations, Haswell's MPLL won't shut down if the
++ * requested frequency isn't supported. But we cannot reinitialize it.
++ * Another different thing: MPLL registers are 4-bit instead of 8-bit.
++ */
++
++ /** FIXME: Obtain current clock frequency if we want to skip this **/
++ //if (mchbar_read32(MC_BIOS_DATA) != 0)
++ // break;
++
++ uint32_t mc_bios_req = ctrl->multiplier;
++ if (ctrl->base_freq == 100) {
++ /* Use 100 MHz reference clock */
++ mc_bios_req |= BIT(4);
++ }
++ mc_bios_req |= BIT(31);
++ printk(RAM_DEBUG, "MC_BIOS_REQ = 0x%08x\n", mc_bios_req);
++ printk(BIOS_DEBUG, "MPLL busy... ");
++ mchbar_write32(MC_BIOS_REQ, mc_bios_req);
++
++ for (unsigned int i = 0; i <= 5000; i++) {
++ if (!(mchbar_read32(MC_BIOS_REQ) & BIT(31))) {
++ printk(BIOS_DEBUG, "done in %u us\n", i);
++ break;
++ }
++ udelay(1);
++ }
++ if (mchbar_read32(MC_BIOS_REQ) & BIT(31))
++ printk(BIOS_DEBUG, "did not lock\n");
++
++ /* Verify locked frequency */
++ const uint32_t mc_bios_data = mchbar_read32(MC_BIOS_DATA);
++ printk(RAM_DEBUG, "MC_BIOS_DATA = 0x%08x\n", mc_bios_data);
++ if ((mc_bios_data & 0xf) >= ctrl->multiplier)
++ break;
++
++ printk(BIOS_DEBUG, "Retrying at a lower frequency\n\n");
++ ctrl->tCK++;
++ }
++ if (!ctrl->mem_clock_mhz) {
++ printk(BIOS_ERR, "Could not program MPLL frequency\n");
++ return RAMINIT_STATUS_MPLL_INIT_FAILURE;
++ }
++ printk(BIOS_DEBUG, "MPLL frequency is set to: %u MHz ", ctrl->mem_clock_mhz);
++ ctrl->mem_clock_fs = 1000000000 / ctrl->mem_clock_mhz;
++ printk(BIOS_DEBUG, "(period: %u femtoseconds)\n", ctrl->mem_clock_fs);
++ ctrl->qclkps = ctrl->mem_clock_fs / 2000;
++ printk(BIOS_DEBUG, "Quadrature clock period: %u picoseconds\n", ctrl->qclkps);
++ return wait_for_first_rcomp();
++}
+diff --git a/src/northbridge/intel/haswell/native_raminit/io_comp_control.c b/src/northbridge/intel/haswell/native_raminit/io_comp_control.c
+new file mode 100644
+index 0000000000..7e96c08938
+--- /dev/null
++++ b/src/northbridge/intel/haswell/native_raminit/io_comp_control.c
+@@ -0,0 +1,22 @@
++/* SPDX-License-Identifier: GPL-2.0-or-later */
++
++#include <commonlib/clamp.h>
++#include <console/console.h>
++#include <northbridge/intel/haswell/haswell.h>
++#include <timer.h>
++#include <types.h>
++
++#include "raminit_native.h"
++
++enum raminit_status wait_for_first_rcomp(void)
++{
++ struct stopwatch timer;
++ stopwatch_init_msecs_expire(&timer, 2000);
++ do {
++ if (mchbar_read32(RCOMP_TIMER) & BIT(16))
++ return RAMINIT_STATUS_SUCCESS;
++
++ } while (!stopwatch_expired(&timer));
++ printk(BIOS_ERR, "Timed out waiting for RCOMP to complete\n");
++ return RAMINIT_STATUS_POLL_TIMEOUT;
++}
+diff --git a/src/northbridge/intel/haswell/native_raminit/raminit_main.c b/src/northbridge/intel/haswell/native_raminit/raminit_main.c
+index 2d2cfa48bb..09545422c0 100644
+--- a/src/northbridge/intel/haswell/native_raminit/raminit_main.c
++++ b/src/northbridge/intel/haswell/native_raminit/raminit_main.c
+@@ -21,6 +21,7 @@ struct task_entry {
+
+ static const struct task_entry cold_boot[] = {
+ { collect_spd_info, true, "PROCSPD", },
++ { initialise_mpll, true, "INITMPLL", },
+ };
+
+ /* Return a generic stepping value to make stepping checks simpler */
+diff --git a/src/northbridge/intel/haswell/native_raminit/raminit_native.h b/src/northbridge/intel/haswell/native_raminit/raminit_native.h
+index 1a0793947e..a54581abc7 100644
+--- a/src/northbridge/intel/haswell/native_raminit/raminit_native.h
++++ b/src/northbridge/intel/haswell/native_raminit/raminit_native.h
+@@ -23,6 +23,8 @@ enum raminit_status {
+ RAMINIT_STATUS_SUCCESS = 0,
+ RAMINIT_STATUS_NO_MEMORY_INSTALLED,
+ RAMINIT_STATUS_UNSUPPORTED_MEMORY,
++ RAMINIT_STATUS_MPLL_INIT_FAILURE,
++ RAMINIT_STATUS_POLL_TIMEOUT,
+ RAMINIT_STATUS_UNSPECIFIED_ERROR, /** TODO: Deprecated in favor of specific values **/
+ };
+
+@@ -82,10 +84,19 @@ struct sysinfo {
+ uint8_t rankmap[NUM_CHANNELS];
+ uint8_t rank_mirrored[NUM_CHANNELS];
+ uint32_t channel_size_mb[NUM_CHANNELS];
++
++ uint8_t base_freq; /* Memory base frequency, either 100 or 133 MHz */
++ uint32_t multiplier;
++ uint32_t mem_clock_mhz;
++ uint32_t mem_clock_fs; /* Memory clock period in femtoseconds */
++ uint32_t qclkps; /* Quadrature clock period in picoseconds */
+ };
+
+ void raminit_main(enum raminit_boot_mode bootmode);
+
+ enum raminit_status collect_spd_info(struct sysinfo *ctrl);
++enum raminit_status initialise_mpll(struct sysinfo *ctrl);
++
++enum raminit_status wait_for_first_rcomp(void);
+
+ #endif
+diff --git a/src/northbridge/intel/haswell/registers/mchbar.h b/src/northbridge/intel/haswell/registers/mchbar.h
+index 5610e7089a..45f8174995 100644
+--- a/src/northbridge/intel/haswell/registers/mchbar.h
++++ b/src/northbridge/intel/haswell/registers/mchbar.h
+@@ -13,6 +13,8 @@
+ #define MC_INIT_STATE_G 0x5030
+ #define MRC_REVISION 0x5034 /* MRC Revision */
+
++#define RCOMP_TIMER 0x5084
++
+ #define MC_LOCK 0x50fc /* Memory Controller Lock register */
+
+ #define GFXVTBAR 0x5400 /* Base address for IGD */
+@@ -61,6 +63,7 @@
+
+ #define BIOS_RESET_CPL 0x5da8 /* 8-bit */
+
++#define MC_BIOS_REQ 0x5e00 /* Memory frequency request register */
+ #define MC_BIOS_DATA 0x5e04 /* Miscellaneous information for BIOS */
+ #define SAPMCTL 0x5f00
+
+--
+2.39.2
+
diff --git a/resources/coreboot/haswell/patches/0012-haswell-NRI-Post-process-selected-timings.patch b/resources/coreboot/haswell/patches/0012-haswell-NRI-Post-process-selected-timings.patch
new file mode 100644
index 00000000..e38f8e57
--- /dev/null
+++ b/resources/coreboot/haswell/patches/0012-haswell-NRI-Post-process-selected-timings.patch
@@ -0,0 +1,249 @@
+From faabed9ca8974b2e7192c55b59a9d28d75e72df6 Mon Sep 17 00:00:00 2001
+From: Angel Pons <th3fanbus@gmail.com>
+Date: Sat, 7 May 2022 16:29:55 +0200
+Subject: [PATCH 12/26] haswell NRI: Post-process selected timings
+
+Once the MPLL has been initialised, convert the timings from the SPD to
+be in DCLKs, which is what the hardware expects. In addition, calculate
+the values for tREFI and tXP.
+
+Change-Id: Id02caf858f75b9e08016762b3aefda282b274386
+Signed-off-by: Angel Pons <th3fanbus@gmail.com>
+---
+ .../intel/haswell/native_raminit/Makefile.inc | 1 +
+ .../haswell/native_raminit/lookup_timings.c | 62 +++++++++++
+ .../haswell/native_raminit/raminit_main.c | 1 +
+ .../haswell/native_raminit/raminit_native.h | 8 ++
+ .../haswell/native_raminit/spd_bitmunching.c | 100 ++++++++++++++++++
+ 5 files changed, 172 insertions(+)
+ create mode 100644 src/northbridge/intel/haswell/native_raminit/lookup_timings.c
+
+diff --git a/src/northbridge/intel/haswell/native_raminit/Makefile.inc b/src/northbridge/intel/haswell/native_raminit/Makefile.inc
+index c125d84f0b..2769e0bbb4 100644
+--- a/src/northbridge/intel/haswell/native_raminit/Makefile.inc
++++ b/src/northbridge/intel/haswell/native_raminit/Makefile.inc
+@@ -1,5 +1,6 @@
+ ## SPDX-License-Identifier: GPL-2.0-or-later
+
++romstage-y += lookup_timings.c
+ romstage-y += init_mpll.c
+ romstage-y += io_comp_control.c
+ romstage-y += raminit_main.c
+diff --git a/src/northbridge/intel/haswell/native_raminit/lookup_timings.c b/src/northbridge/intel/haswell/native_raminit/lookup_timings.c
+new file mode 100644
+index 0000000000..038686c844
+--- /dev/null
++++ b/src/northbridge/intel/haswell/native_raminit/lookup_timings.c
+@@ -0,0 +1,62 @@
++/* SPDX-License-Identifier: GPL-2.0-or-later */
++
++#include <commonlib/clamp.h>
++#include <types.h>
++
++#include "raminit_native.h"
++
++struct timing_lookup {
++ uint32_t clock;
++ uint32_t value;
++};
++
++static uint32_t lookup_timing(
++ const uint32_t mem_clock_mhz,
++ const struct timing_lookup *const lookup,
++ const size_t length)
++{
++ /* Fall back to the last index */
++ size_t i;
++ for (i = 0; i < length - 1; i++) {
++ /* Account for imprecise frequency values */
++ if ((mem_clock_mhz - 5) <= lookup[i].clock)
++ break;
++ }
++ return lookup[i].value;
++}
++
++static const uint32_t fmax = UINT32_MAX;
++
++uint8_t get_tCWL(const uint32_t mem_clock_mhz)
++{
++ const struct timing_lookup lut[] = {
++ { 400, 5 },
++ { 533, 6 },
++ { 666, 7 },
++ { 800, 8 },
++ { 933, 9 },
++ { 1066, 10 },
++ { 1200, 11 },
++ { fmax, 12 },
++ };
++ return lookup_timing(mem_clock_mhz, lut, ARRAY_SIZE(lut));
++}
++
++/* tREFI = 7800 ns * DDR MHz */
++uint32_t get_tREFI(const uint32_t mem_clock_mhz)
++{
++ return (mem_clock_mhz * 7800) / 1000;
++}
++
++uint32_t get_tXP(const uint32_t mem_clock_mhz)
++{
++ const struct timing_lookup lut[] = {
++ { 400, 3 },
++ { 666, 4 },
++ { 800, 5 },
++ { 933, 6 },
++ { 1066, 7 },
++ { fmax, 8 },
++ };
++ return lookup_timing(mem_clock_mhz, lut, ARRAY_SIZE(lut));
++}
+diff --git a/src/northbridge/intel/haswell/native_raminit/raminit_main.c b/src/northbridge/intel/haswell/native_raminit/raminit_main.c
+index 09545422c0..5f2be980d4 100644
+--- a/src/northbridge/intel/haswell/native_raminit/raminit_main.c
++++ b/src/northbridge/intel/haswell/native_raminit/raminit_main.c
+@@ -22,6 +22,7 @@ struct task_entry {
+ static const struct task_entry cold_boot[] = {
+ { collect_spd_info, true, "PROCSPD", },
+ { initialise_mpll, true, "INITMPLL", },
++ { convert_timings, true, "CONVTIM", },
+ };
+
+ /* Return a generic stepping value to make stepping checks simpler */
+diff --git a/src/northbridge/intel/haswell/native_raminit/raminit_native.h b/src/northbridge/intel/haswell/native_raminit/raminit_native.h
+index a54581abc7..01e5ed1bd6 100644
+--- a/src/northbridge/intel/haswell/native_raminit/raminit_native.h
++++ b/src/northbridge/intel/haswell/native_raminit/raminit_native.h
+@@ -78,6 +78,9 @@ struct sysinfo {
+ uint32_t tCWL;
+ uint32_t tCMD;
+
++ uint32_t tREFI;
++ uint32_t tXP;
++
+ uint8_t lanes; /* 8 or 9 */
+ uint8_t chanmap;
+ uint8_t dpc[NUM_CHANNELS]; /* DIMMs per channel */
+@@ -96,7 +99,12 @@ void raminit_main(enum raminit_boot_mode bootmode);
+
+ enum raminit_status collect_spd_info(struct sysinfo *ctrl);
+ enum raminit_status initialise_mpll(struct sysinfo *ctrl);
++enum raminit_status convert_timings(struct sysinfo *ctrl);
+
+ enum raminit_status wait_for_first_rcomp(void);
+
++uint8_t get_tCWL(uint32_t mem_clock_mhz);
++uint32_t get_tREFI(uint32_t mem_clock_mhz);
++uint32_t get_tXP(uint32_t mem_clock_mhz);
++
+ #endif
+diff --git a/src/northbridge/intel/haswell/native_raminit/spd_bitmunching.c b/src/northbridge/intel/haswell/native_raminit/spd_bitmunching.c
+index dbe02c72d0..becbea0725 100644
+--- a/src/northbridge/intel/haswell/native_raminit/spd_bitmunching.c
++++ b/src/northbridge/intel/haswell/native_raminit/spd_bitmunching.c
+@@ -204,3 +204,103 @@ enum raminit_status collect_spd_info(struct sysinfo *ctrl)
+ get_spd_data(ctrl);
+ return find_common_spd_parameters(ctrl);
+ }
++
++#define MIN_CWL 5
++#define MAX_CWL 12
++
++/* Except for tCK, hardware expects all timing values in DCLKs, not nanoseconds */
++enum raminit_status convert_timings(struct sysinfo *ctrl)
++{
++ /*
++ * Obtain all required timing values, in DCLKs.
++ */
++
++ /* Convert primary timings from nanoseconds to DCLKs */
++ ctrl->tAA = DIV_ROUND_UP(ctrl->tAA, ctrl->tCK);
++ ctrl->tWR = DIV_ROUND_UP(ctrl->tWR, ctrl->tCK);
++ ctrl->tRCD = DIV_ROUND_UP(ctrl->tRCD, ctrl->tCK);
++ ctrl->tRRD = DIV_ROUND_UP(ctrl->tRRD, ctrl->tCK);
++ ctrl->tRP = DIV_ROUND_UP(ctrl->tRP, ctrl->tCK);
++ ctrl->tRAS = DIV_ROUND_UP(ctrl->tRAS, ctrl->tCK);
++ ctrl->tRC = DIV_ROUND_UP(ctrl->tRC, ctrl->tCK);
++ ctrl->tRFC = DIV_ROUND_UP(ctrl->tRFC, ctrl->tCK);
++ ctrl->tWTR = DIV_ROUND_UP(ctrl->tWTR, ctrl->tCK);
++ ctrl->tRTP = DIV_ROUND_UP(ctrl->tRTP, ctrl->tCK);
++ ctrl->tFAW = DIV_ROUND_UP(ctrl->tFAW, ctrl->tCK);
++ ctrl->tCWL = DIV_ROUND_UP(ctrl->tCWL, ctrl->tCK);
++ ctrl->tCMD = DIV_ROUND_UP(ctrl->tCMD, ctrl->tCK);
++
++ /* Constrain primary timings to hardware limits */
++ /** TODO: complain when clamping? **/
++ ctrl->tAA = clamp_u32(4, ctrl->tAA, 24);
++ ctrl->tWR = clamp_u32(5, ctrl->tWR, 16);
++ ctrl->tRCD = clamp_u32(4, ctrl->tRCD, 20);
++ ctrl->tRRD = clamp_u32(4, ctrl->tRRD, 65535);
++ ctrl->tRP = clamp_u32(4, ctrl->tRP, 15);
++ ctrl->tRAS = clamp_u32(10, ctrl->tRAS, 40);
++ ctrl->tRC = clamp_u32(1, ctrl->tRC, 4095);
++ ctrl->tRFC = clamp_u32(1, ctrl->tRFC, 511);
++ ctrl->tWTR = clamp_u32(4, ctrl->tWTR, 10);
++ ctrl->tRTP = clamp_u32(4, ctrl->tRTP, 15);
++ ctrl->tFAW = clamp_u32(10, ctrl->tFAW, 54);
++
++ /** TODO: Honor tREFI from XMP **/
++ ctrl->tREFI = get_tREFI(ctrl->mem_clock_mhz);
++ ctrl->tXP = get_tXP(ctrl->mem_clock_mhz);
++
++ /*
++ * Check some values, and adjust them if necessary.
++ */
++
++ /* If tWR cannot be written into DDR3 MR0, adjust it */
++ switch (ctrl->tWR) {
++ case 9:
++ case 11:
++ case 13:
++ case 15:
++ ctrl->tWR++;
++ }
++
++ /* If tCWL is not supported or unspecified, look up a reasonable default */
++ if (ctrl->tCWL < MIN_CWL || ctrl->tCWL > MAX_CWL)
++ ctrl->tCWL = get_tCWL(ctrl->mem_clock_mhz);
++
++ /* This is needed to support ODT properly on 2DPC */
++ if (ctrl->tAA - ctrl->tCWL > 4)
++ ctrl->tCWL = ctrl->tAA - 4;
++
++ /* If tCMD is invalid, use a guesstimate default */
++ if (!ctrl->tCMD) {
++ ctrl->tCMD = MAX(ctrl->dpc[0], ctrl->dpc[1]);
++ printk(RAM_DEBUG, "tCMD was zero, picking a guesstimate value\n");
++ }
++ ctrl->tCMD = clamp_u32(1, ctrl->tCMD, 3);
++
++ /*
++ * Print final timings.
++ */
++
++ /* tCK is special */
++ printk(BIOS_DEBUG, "Selected tCK : %u ns\n", ctrl->tCK / 256);
++
++ /* Primary timings */
++ printk(BIOS_DEBUG, "Selected tAA : %uT\n", ctrl->tAA);
++ printk(BIOS_DEBUG, "Selected tWR : %uT\n", ctrl->tWR);
++ printk(BIOS_DEBUG, "Selected tRCD : %uT\n", ctrl->tRCD);
++ printk(BIOS_DEBUG, "Selected tRRD : %uT\n", ctrl->tRRD);
++ printk(BIOS_DEBUG, "Selected tRP : %uT\n", ctrl->tRP);
++ printk(BIOS_DEBUG, "Selected tRAS : %uT\n", ctrl->tRAS);
++ printk(BIOS_DEBUG, "Selected tRC : %uT\n", ctrl->tRC);
++ printk(BIOS_DEBUG, "Selected tRFC : %uT\n", ctrl->tRFC);
++ printk(BIOS_DEBUG, "Selected tWTR : %uT\n", ctrl->tWTR);
++ printk(BIOS_DEBUG, "Selected tRTP : %uT\n", ctrl->tRTP);
++ printk(BIOS_DEBUG, "Selected tFAW : %uT\n", ctrl->tFAW);
++ printk(BIOS_DEBUG, "Selected tCWL : %uT\n", ctrl->tCWL);
++ printk(BIOS_DEBUG, "Selected tCMD : %uT\n", ctrl->tCMD);
++
++ /* Derived timings */
++ printk(BIOS_DEBUG, "Selected tREFI : %uT\n", ctrl->tREFI);
++ printk(BIOS_DEBUG, "Selected tXP : %uT\n", ctrl->tXP);
++
++ return RAMINIT_STATUS_SUCCESS;
++}
+--
+2.39.2
+
diff --git a/resources/coreboot/haswell/patches/0013-haswell-NRI-Configure-initial-MC-settings.patch b/resources/coreboot/haswell/patches/0013-haswell-NRI-Configure-initial-MC-settings.patch
new file mode 100644
index 00000000..b1c33328
--- /dev/null
+++ b/resources/coreboot/haswell/patches/0013-haswell-NRI-Configure-initial-MC-settings.patch
@@ -0,0 +1,1593 @@
+From 1b0b17d85256193de825fa7ff0e04767c818f2fc Mon Sep 17 00:00:00 2001
+From: Angel Pons <th3fanbus@gmail.com>
+Date: Sat, 7 May 2022 17:22:07 +0200
+Subject: [PATCH 13/26] haswell NRI: Configure initial MC settings
+
+Program initial memory controller settings. Many of these values will be
+adjusted later during training.
+
+Change-Id: If33846b51cb1bab5d0458fe626e13afb1bdc900e
+Signed-off-by: Angel Pons <th3fanbus@gmail.com>
+---
+ .../intel/haswell/native_raminit/Makefile.inc | 2 +
+ .../haswell/native_raminit/configure_mc.c | 822 ++++++++++++++++++
+ .../haswell/native_raminit/raminit_main.c | 2 +
+ .../haswell/native_raminit/raminit_native.h | 101 +++
+ .../haswell/native_raminit/reg_structs.h | 405 +++++++++
+ .../haswell/native_raminit/timings_refresh.c | 13 +
+ .../intel/haswell/registers/mchbar.h | 94 ++
+ 7 files changed, 1439 insertions(+)
+ create mode 100644 src/northbridge/intel/haswell/native_raminit/configure_mc.c
+ create mode 100644 src/northbridge/intel/haswell/native_raminit/reg_structs.h
+ create mode 100644 src/northbridge/intel/haswell/native_raminit/timings_refresh.c
+
+diff --git a/src/northbridge/intel/haswell/native_raminit/Makefile.inc b/src/northbridge/intel/haswell/native_raminit/Makefile.inc
+index 2769e0bbb4..fc55277a65 100644
+--- a/src/northbridge/intel/haswell/native_raminit/Makefile.inc
++++ b/src/northbridge/intel/haswell/native_raminit/Makefile.inc
+@@ -1,8 +1,10 @@
+ ## SPDX-License-Identifier: GPL-2.0-or-later
+
++romstage-y += configure_mc.c
+ romstage-y += lookup_timings.c
+ romstage-y += init_mpll.c
+ romstage-y += io_comp_control.c
+ romstage-y += raminit_main.c
+ romstage-y += raminit_native.c
+ romstage-y += spd_bitmunching.c
++romstage-y += timings_refresh.c
+diff --git a/src/northbridge/intel/haswell/native_raminit/configure_mc.c b/src/northbridge/intel/haswell/native_raminit/configure_mc.c
+new file mode 100644
+index 0000000000..2a667b075b
+--- /dev/null
++++ b/src/northbridge/intel/haswell/native_raminit/configure_mc.c
+@@ -0,0 +1,822 @@
++/* SPDX-License-Identifier: GPL-2.0-or-later */
++
++#include <assert.h>
++#include <commonlib/clamp.h>
++#include <console/console.h>
++#include <delay.h>
++#include <lib.h>
++#include <northbridge/intel/haswell/haswell.h>
++#include <string.h>
++#include <types.h>
++
++#include "raminit_native.h"
++
++static void program_misc_control(struct sysinfo *ctrl)
++{
++ if (!is_hsw_ult())
++ return;
++
++ const union ddr_scram_misc_control_reg ddr_scram_misc_ctrl = {
++ .ddr_no_ch_interleave = !ctrl->dq_pins_interleaved,
++ .lpddr_mode = ctrl->lpddr,
++ .cke_mapping_ch0 = ctrl->lpddr ? ctrl->lpddr_cke_rank_map[0] : 0,
++ .cke_mapping_ch1 = ctrl->lpddr ? ctrl->lpddr_cke_rank_map[1] : 0,
++ };
++ mchbar_write32(DDR_SCRAM_MISC_CONTROL, ddr_scram_misc_ctrl.raw);
++}
++
++static void program_mrc_revision(void)
++{
++ mchbar_write32(MRC_REVISION, 0x01090000); /* MRC 1.9.0 Build 0 */
++}
++
++static void program_ranks_used(struct sysinfo *ctrl)
++{
++ for (uint8_t channel = 0; channel < NUM_CHANNELS; channel++) {
++ mchbar_write8(MC_INIT_STATE_ch(channel), ctrl->rankmap[channel]);
++ if (!does_ch_exist(ctrl, channel)) {
++ mchbar_write32(DDR_CLK_ch_RANKS_USED(channel), 0);
++ mchbar_write32(DDR_CTL_ch_CTL_RANKS_USED(channel), 0);
++ mchbar_write32(DDR_CKE_ch_CTL_RANKS_USED(channel), 0);
++ continue;
++ }
++ uint32_t clk_ranks_used = ctrl->rankmap[channel];
++ if (ctrl->lpddr) {
++ /* With LPDDR, the clock usage goes by group instead */
++ clk_ranks_used = 0;
++ for (uint8_t group = 0; group < NUM_GROUPS; group++) {
++ if (ctrl->dq_byte_map[channel][CT_ITERATION_CLOCK][group])
++ clk_ranks_used |= BIT(group);
++ }
++ }
++ mchbar_write32(DDR_CLK_ch_RANKS_USED(channel), clk_ranks_used);
++
++ uint32_t ctl_ranks_used = ctrl->rankmap[channel];
++ if (is_hsw_ult()) {
++ /* Set ODT disable bits */
++ /** TODO: May need to do this after JEDEC reset/init **/
++ if (ctrl->lpddr && ctrl->lpddr_dram_odt)
++ ctl_ranks_used |= 2 << 4; /* ODT is used on rank 0 */
++ else
++ ctl_ranks_used |= 3 << 4;
++ }
++ mchbar_write32(DDR_CTL_ch_CTL_RANKS_USED(channel), ctl_ranks_used);
++
++ uint32_t cke_ranks_used = ctrl->rankmap[channel];
++ if (ctrl->lpddr) {
++ /* Use CKE-to-rank mapping for LPDDR */
++ const uint8_t cke_rank_map = ctrl->lpddr_cke_rank_map[channel];
++ cke_ranks_used = 0;
++ for (uint8_t rank = 0; rank < NUM_SLOTRANKS; rank++) {
++ /* ULT only has 2 ranks per channel */
++ if (rank >= 2)
++ break;
++
++ if (!rank_in_ch(ctrl, rank, channel))
++ continue;
++
++ for (uint8_t cke = 0; cke < 4; cke++) {
++ if (rank == ((cke_rank_map >> cke) & 1))
++ cke_ranks_used |= BIT(cke);
++ }
++ }
++ }
++ mchbar_write32(DDR_CKE_ch_CTL_RANKS_USED(channel), cke_ranks_used);
++ }
++}
++
++static const uint8_t rxb_trad[2][5][4] = {
++ { /* Vdd low */
++ /* 1067 MHz, 1333 MHz, 1600 MHz, 1867 MHz, 2133 MHz, */
++ {4, 3, 3, 2}, {4, 4, 3, 2}, {5, 4, 3, 3}, {5, 4, 4, 3}, {5, 4, 4, 3},
++ },
++ { /* Vdd hi */
++ /* 1067 MHz, 1333 MHz, 1600 MHz, 1867 MHz, 2133 MHz, */
++ {4, 3, 3, 2}, {4, 4, 3, 2}, {5, 4, 3, 3}, {5, 4, 4, 3}, {4, 4, 3, 3},
++ },
++};
++
++static const uint8_t rxb_ultx[2][3][4] = {
++ { /* Vdd low */
++ /* 1067 MHz, 1333 MHz, 1600 MHz, */
++ {5, 6, 6, 5}, {5, 6, 6, 5}, {4, 6, 6, 6},
++ },
++ { /* Vdd hi */
++ /* 1067 MHz, 1333 MHz, 1600 MHz, */
++ {7, 6, 6, 5}, {7, 6, 6, 5}, {7, 6, 6, 6},
++ },
++};
++
++uint8_t get_rx_bias(const struct sysinfo *ctrl)
++{
++ const bool is_ult = is_hsw_ult();
++ const bool vddhi = ctrl->vdd_mv > 1350;
++ const uint8_t max_rxf = is_ult ? ARRAY_SIZE(rxb_ultx[0]) : ARRAY_SIZE(rxb_trad[0]);
++ const uint8_t ref_clk = ctrl->base_freq == 133 ? 4 : 6;
++ const uint8_t rx_f = clamp_s8(0, ctrl->multiplier - ref_clk, max_rxf - 1);
++ const uint8_t rx_cb = mchbar_read32(DDR_CLK_CB_STATUS) & 0x3;
++ if (is_ult)
++ return rxb_ultx[vddhi][rx_f][rx_cb];
++ else
++ return rxb_trad[vddhi][rx_f][rx_cb];
++}
++
++static void program_ddr_data(struct sysinfo *ctrl, const bool dis_odt_static, const bool vddhi)
++{
++ const bool is_ult = is_hsw_ult();
++
++ for (uint8_t rank = 0; rank < NUM_SLOTRANKS; rank++) {
++ if (!does_rank_exist(ctrl, rank))
++ continue;
++
++ const union ddr_data_rx_train_rank_reg rx_train = {
++ .rcven = 64,
++ .dqs_p = 32,
++ .dqs_n = 32,
++ };
++ mchbar_write32(DDR_DATA_RX_TRAIN_RANK(rank), rx_train.raw);
++ mchbar_write32(DDR_DATA_RX_PER_BIT_RANK(rank), 0x88888888);
++
++ const union ddr_data_tx_train_rank_reg tx_train = {
++ .tx_eq = TXEQFULLDRV | 11,
++ .dq_delay = 96,
++ .dqs_delay = 64,
++ };
++ mchbar_write32(DDR_DATA_TX_TRAIN_RANK(rank), tx_train.raw);
++ mchbar_write32(DDR_DATA_TX_PER_BIT_RANK(rank), 0x88888888);
++
++ for (uint8_t channel = 0; channel < NUM_CHANNELS; channel++) {
++ for (uint8_t byte = 0; byte < ctrl->lanes; byte++) {
++ ctrl->tx_dq[channel][rank][byte] = tx_train.dq_delay;
++ ctrl->txdqs[channel][rank][byte] = tx_train.dqs_delay;
++ ctrl->tx_eq[channel][rank][byte] = tx_train.tx_eq;
++
++ ctrl->rcven[channel][rank][byte] = rx_train.rcven;
++ ctrl->rxdqsp[channel][rank][byte] = rx_train.dqs_p;
++ ctrl->rxdqsn[channel][rank][byte] = rx_train.dqs_n;
++ ctrl->rx_eq[channel][rank][byte] = rx_train.rx_eq;
++ }
++ }
++ }
++ mchbar_write32(DDR_DATA_TX_XTALK, 0);
++ mchbar_write32(DDR_DATA_RX_OFFSET_VDQ, 0x88888888);
++ mchbar_write32(DDR_DATA_OFFSET_TRAIN, 0);
++ mchbar_write32(DDR_DATA_OFFSET_COMP, 0);
++
++ const union ddr_data_control_0_reg data_control_0 = {
++ .internal_clocks_on = !is_ult,
++ .data_vccddq_hi = vddhi,
++ .disable_odt_static = dis_odt_static,
++ .lpddr_mode = ctrl->lpddr,
++ .odt_samp_extend_en = ctrl->lpddr,
++ .early_rleak_en = ctrl->lpddr && ctrl->stepping >= STEPPING_C0,
++ };
++ mchbar_write32(DDR_DATA_CONTROL_0, data_control_0.raw);
++
++ const union ddr_data_control_1_reg data_control_1 = {
++ .dll_mask = 1,
++ .rx_bias_ctl = get_rx_bias(ctrl),
++ .odt_delay = -2,
++ .odt_duration = 7,
++ .sense_amp_delay = -2,
++ .sense_amp_duration = 7,
++ };
++ mchbar_write32(DDR_DATA_CONTROL_1, data_control_1.raw);
++
++ clear_data_offset_train_all(ctrl);
++
++ /* Stagger byte turn-on to reduce dI/dT */
++ const uint8_t byte_stagger[] = { 0, 4, 1, 5, 2, 6, 3, 7, 8 };
++ const uint8_t latency = 2 * ctrl->tAA - 6;
++ for (uint8_t channel = 0; channel < NUM_CHANNELS; channel++) {
++ if (!does_ch_exist(ctrl, channel))
++ continue;
++
++ union ddr_data_control_2_reg data_control_2 = {
++ .raw = 0,
++ };
++ if (is_ult) {
++ data_control_2.rx_dqs_amp_offset = 8;
++ data_control_2.rx_clk_stg_num = 0x1f;
++ data_control_2.leaker_comp = ctrl->lpddr ? 3 : 0;
++ }
++ for (uint8_t byte = 0; byte < ctrl->lanes; byte++) {
++ const uint8_t stg = latency * byte_stagger[byte] / ctrl->lanes;
++ data_control_2.rx_stagger_ctl = stg & 0x1f;
++ mchbar_write32(DQ_CONTROL_2(channel, byte), data_control_2.raw);
++ ctrl->data_offset_comp[channel][byte] = 0;
++ ctrl->dq_control_1[channel][byte] = data_control_1.raw;
++ ctrl->dq_control_2[channel][byte] = data_control_2.raw;
++ }
++ ctrl->dq_control_0[channel] = data_control_0.raw;
++ }
++}
++
++static void program_vsshi_control(struct sysinfo *ctrl, const uint16_t vsshi_mv)
++{
++ const uint32_t vsshi_control_reg = is_hsw_ult() ? 0x366c : 0x306c;
++ const union ddr_comp_vsshi_control_reg ddr_vsshi_control = {
++ .vsshi_target = (vsshi_mv * 192) / ctrl->vdd_mv - 20,
++ .hi_bw_divider = 1,
++ .lo_bw_divider = 1,
++ .bw_error = 2,
++ .panic_driver_en = 1,
++ .panic_voltage = 24 / 8, /* Voltage in 8mV steps */
++ .gain_boost = 1,
++ };
++ mchbar_write32(vsshi_control_reg, ddr_vsshi_control.raw);
++ mchbar_write32(DDR_COMP_VSSHI_CONTROL, ddr_vsshi_control.raw);
++}
++
++static void calc_vt_slope_code(const uint16_t slope, uint8_t *best_a, uint8_t *best_b)
++{
++ const int16_t coding[] = {0, -125, -62, -31, 250, 125, 62, 31};
++ *best_a = 0;
++ *best_b = 0;
++ int16_t best_err = slope;
++ for (uint8_t b = 0; b < ARRAY_SIZE(coding); b++) {
++ for (uint8_t a = b; a < ARRAY_SIZE(coding); a++) {
++ int16_t error = slope - (coding[a] + coding[b]);
++ if (error < 0)
++ error = -error;
++
++ if (error < best_err) {
++ best_err = error;
++ *best_a = a;
++ *best_b = b;
++ }
++ }
++ }
++}
++
++static void program_dimm_vref(struct sysinfo *ctrl, const uint16_t vccio_mv, const bool vddhi)
++{
++ const bool is_ult = is_hsw_ult();
++
++ /* Static values for ULT */
++ uint8_t vt_slope_a = 4;
++ uint8_t vt_slope_b = 0;
++ if (!is_ult) {
++ /* On non-ULT, compute best slope code */
++ const uint16_t vt_slope = 1500 * vccio_mv / ctrl->vdd_mv - 1000;
++ calc_vt_slope_code(vt_slope, &vt_slope_a, &vt_slope_b);
++ }
++ const union ddr_data_vref_control_reg ddr_vref_control = {
++ .hi_bw_divider = is_ult ? 0 : 3,
++ .lo_bw_divider = 3,
++ .sample_divider = is_ult ? 1 : 3,
++ .slow_bw_error = 1,
++ .hi_bw_enable = 1,
++ .vt_slope_b = vt_slope_b,
++ .vt_slope_a = vt_slope_a,
++ .vt_offset = 0,
++ };
++ mchbar_write32(is_ult ? 0xf68 : 0xf6c, ddr_vref_control.raw); /* Use CH1 byte 7 */
++
++ const union ddr_data_vref_adjust_reg ddr_vref_adjust = {
++ .en_dimm_vref_ca = 1,
++ .en_dimm_vref_ch0 = 1,
++ .en_dimm_vref_ch1 = 1,
++ .vccddq_hi_qnnn_h = vddhi,
++ .hi_z_timer_ctrl = 3,
++ };
++ ctrl->dimm_vref = ddr_vref_adjust;
++ mchbar_write32(DDR_DATA_VREF_ADJUST, ddr_vref_adjust.raw);
++}
++
++static uint32_t pi_code(const uint32_t code)
++{
++ return code << 21 | code << 14 | code << 7 | code << 0;
++}
++
++static void program_ddr_ca(struct sysinfo *ctrl, const bool vddhi)
++{
++ for (uint8_t channel = 0; channel < NUM_CHANNELS; channel++) {
++ if (!does_ch_exist(ctrl, channel))
++ continue;
++
++ const union ddr_clk_controls_reg ddr_clk_controls = {
++ .dll_mask = 1,
++ .vccddq_hi = vddhi,
++ .lpddr_mode = ctrl->lpddr,
++ };
++ mchbar_write32(DDR_CLK_ch_CONTROLS(channel), ddr_clk_controls.raw);
++
++ const union ddr_cmd_controls_reg ddr_cmd_controls = {
++ .dll_mask = 1,
++ .vccddq_hi = vddhi,
++ .lpddr_mode = ctrl->lpddr,
++ .early_weak_drive = 3,
++ .cmd_tx_eq = 1,
++ };
++ mchbar_write32(DDR_CMD_ch_CONTROLS(channel), ddr_cmd_controls.raw);
++
++ const union ddr_cke_ctl_controls_reg ddr_cke_controls = {
++ .dll_mask = 1,
++ .vccddq_hi = vddhi,
++ .lpddr_mode = ctrl->lpddr,
++ .early_weak_drive = 3,
++ .cmd_tx_eq = 1,
++ .ctl_tx_eq = 1,
++ .ctl_sr_drv = 2,
++ };
++ mchbar_write32(DDR_CKE_ch_CTL_CONTROLS(channel), ddr_cke_controls.raw);
++
++ const union ddr_cke_ctl_controls_reg ddr_ctl_controls = {
++ .dll_mask = 1,
++ .vccddq_hi = vddhi,
++ .lpddr_mode = ctrl->lpddr,
++ .ctl_tx_eq = 1,
++ .ctl_sr_drv = 2,
++ .la_drv_en_ovrd = 1, /* Must be set on ULT */
++ };
++ mchbar_write32(DDR_CTL_ch_CTL_CONTROLS(channel), ddr_ctl_controls.raw);
++
++ const uint8_t cmd_pi = ctrl->lpddr ? 96 : 64;
++ mchbar_write32(DDR_CMD_ch_PI_CODING(channel), pi_code(cmd_pi));
++ mchbar_write32(DDR_CKE_ch_CMD_PI_CODING(channel), pi_code(cmd_pi));
++ mchbar_write32(DDR_CKE_CTL_ch_CTL_PI_CODING(channel), pi_code(64));
++ mchbar_write32(DDR_CLK_ch_PI_CODING(channel), pi_code(64));
++
++ mchbar_write32(DDR_CMD_ch_COMP_OFFSET(channel), 0);
++ mchbar_write32(DDR_CLK_ch_COMP_OFFSET(channel), 0);
++ mchbar_write32(DDR_CKE_CTL_ch_CTL_COMP_OFFSET(channel), 0);
++
++ for (uint8_t group = 0; group < NUM_GROUPS; group++) {
++ ctrl->cke_cmd_pi_code[channel][group] = cmd_pi;
++ ctrl->cmd_north_pi_code[channel][group] = cmd_pi;
++ ctrl->cmd_south_pi_code[channel][group] = cmd_pi;
++ }
++ for (uint8_t rank = 0; rank < NUM_SLOTRANKS; rank++) {
++ ctrl->clk_pi_code[channel][rank] = 64;
++ ctrl->ctl_pi_code[channel][rank] = 64;
++ }
++ }
++}
++
++enum {
++ RCOMP_RD_ODT = 0,
++ RCOMP_WR_DS_DQ,
++ RCOMP_WR_DS_CMD,
++ RCOMP_WR_DS_CTL,
++ RCOMP_WR_DS_CLK,
++ RCOMP_MAX_CODES,
++};
++
++struct rcomp_info {
++ uint8_t resistor;
++ uint8_t sz_steps;
++ uint8_t target_r;
++ int8_t result;
++};
++
++static void program_rcomp_vref(struct sysinfo *ctrl, const bool dis_odt_static)
++{
++ const bool is_ult = is_hsw_ult();
++ /*
++ * +-------------------------------+
++ * | Rcomp resistor values in ohms |
++ * +-----------+------+------+-----+
++ * | Ball name | Trad | ULTX | Use |
++ * +-----------+------+------+-----+
++ * | SM_RCOMP0 | 100 | 200 | CMD |
++ * | SM_RCOMP1 | 75 | 120 | DQ |
++ * | SM_RCOMP2 | 100 | 100 | ODT |
++ * +-----------+------+------+-----+
++ */
++ struct rcomp_info rcomp_cfg[RCOMP_MAX_CODES] = {
++ [RCOMP_RD_ODT] = {
++ .resistor = 50,
++ .sz_steps = 96,
++ .target_r = 50,
++ },
++ [RCOMP_WR_DS_DQ] = {
++ .resistor = 25,
++ .sz_steps = 64,
++ .target_r = 33,
++ },
++ [RCOMP_WR_DS_CMD] = {
++ .resistor = 20,
++ .sz_steps = 64,
++ .target_r = 20,
++ },
++ [RCOMP_WR_DS_CTL] = {
++ .resistor = 20,
++ .sz_steps = 64,
++ .target_r = 20,
++ },
++ [RCOMP_WR_DS_CLK] = {
++ .resistor = 25,
++ .sz_steps = 64,
++ .target_r = 29,
++ },
++ };
++ if (is_ult) {
++ rcomp_cfg[RCOMP_WR_DS_DQ].resistor = 40;
++ rcomp_cfg[RCOMP_WR_DS_DQ].target_r = 40;
++ rcomp_cfg[RCOMP_WR_DS_CLK].resistor = 40;
++ } else if (ctrl->dpc[0] == 2 || ctrl->dpc[1] == 2) {
++ rcomp_cfg[RCOMP_RD_ODT].target_r = 60;
++ }
++ for (uint8_t i = 0; i < RCOMP_MAX_CODES; i++) {
++ struct rcomp_info *const r = &rcomp_cfg[i];
++ const int32_t div = 2 * (r->resistor + r->target_r);
++ assert(div);
++ const int32_t vref = (r->sz_steps * (r->resistor - r->target_r)) / div;
++
++ /* DqOdt is 5 bits wide, the other Rcomp targets are 4 bits wide */
++ const int8_t comp_limit = i == RCOMP_RD_ODT ? 16 : 8;
++ r->result = clamp_s32(-comp_limit, vref, comp_limit - 1);
++ }
++ const union ddr_comp_ctl_0_reg ddr_comp_ctl_0 = {
++ .disable_odt_static = dis_odt_static,
++ .dq_drv_vref = rcomp_cfg[RCOMP_WR_DS_DQ].result,
++ .dq_odt_vref = rcomp_cfg[RCOMP_RD_ODT].result,
++ .cmd_drv_vref = rcomp_cfg[RCOMP_WR_DS_CMD].result,
++ .ctl_drv_vref = rcomp_cfg[RCOMP_WR_DS_CTL].result,
++ .clk_drv_vref = rcomp_cfg[RCOMP_WR_DS_CLK].result,
++ };
++ ctrl->comp_ctl_0 = ddr_comp_ctl_0;
++ mchbar_write32(DDR_COMP_CTL_0, ctrl->comp_ctl_0.raw);
++}
++
++enum {
++ SCOMP_DQ = 0,
++ SCOMP_CMD,
++ SCOMP_CTL,
++ SCOMP_CLK,
++ SCOMP_MAX_CODES,
++};
++
++static void program_slew_rates(struct sysinfo *ctrl, const bool vddhi)
++{
++ const uint8_t min_cycle_delay[SCOMP_MAX_CODES] = { 46, 70, 70, 46 };
++ uint8_t buffer_stage_delay_ps[SCOMP_MAX_CODES] = { 59, 53, 53, 53 };
++ uint16_t comp_slew_rate_codes[SCOMP_MAX_CODES];
++
++ /* CMD Slew Rate = 1.8 for 2N */
++ if (ctrl->tCMD == 2)
++ buffer_stage_delay_ps[SCOMP_CMD] = 89;
++
++ /* CMD Slew Rate = 4 V/ns for double-pumped CMD bus */
++ if (ctrl->lpddr)
++ buffer_stage_delay_ps[SCOMP_CMD] = 63;
++
++ for (uint8_t i = 0; i < SCOMP_MAX_CODES; i++) {
++ uint16_t stages = DIV_ROUND_CLOSEST(ctrl->qclkps, buffer_stage_delay_ps[i]);
++ if (stages < 5)
++ stages = 5;
++
++ bool dll_pc = buffer_stage_delay_ps[i] < min_cycle_delay[i] || stages > 16;
++
++ /* Lock DLL... */
++ if (dll_pc)
++ comp_slew_rate_codes[i] = stages / 2 - 1; /* to a phase */
++ else
++ comp_slew_rate_codes[i] = (stages - 1) | BIT(4); /* to a cycle */
++ }
++ union ddr_comp_ctl_1_reg ddr_comp_ctl_1 = {
++ .dq_scomp = comp_slew_rate_codes[SCOMP_DQ],
++ .cmd_scomp = comp_slew_rate_codes[SCOMP_CMD],
++ .ctl_scomp = comp_slew_rate_codes[SCOMP_CTL],
++ .clk_scomp = comp_slew_rate_codes[SCOMP_CLK],
++ .vccddq_hi = vddhi,
++ };
++ ctrl->comp_ctl_1 = ddr_comp_ctl_1;
++ mchbar_write32(DDR_COMP_CTL_1, ctrl->comp_ctl_1.raw);
++}
++
++static uint32_t ln_x100(const uint32_t input_x100)
++{
++ uint32_t val = input_x100;
++ uint32_t ret = 0;
++ while (val > 271) {
++ val = (val * 1000) / 2718;
++ ret += 100;
++ }
++ return ret + (-16 * val * val + 11578 * val - 978860) / 10000;
++}
++
++static uint32_t compute_vsshi_vref(struct sysinfo *ctrl, const uint32_t vsshi_tgt, bool up)
++{
++ const uint32_t delta = 15;
++ const uint32_t c_die_vsshi = 2000;
++ const uint32_t r_cmd_ref = 100 * 10;
++ const uint32_t offset = up ? 64 : 0;
++ const uint32_t ln_vsshi = ln_x100((100 * vsshi_tgt) / (vsshi_tgt - delta));
++ const uint32_t r_target = (ctrl->qclkps * 2000) / (c_die_vsshi * ln_vsshi);
++ const uint32_t r_dividend = 128 * (up ? r_cmd_ref : r_target);
++ return r_dividend / (r_cmd_ref + r_target) - offset;
++}
++
++static void program_vsshi(struct sysinfo *ctrl, const uint16_t vccio_mv, const uint16_t vsshi)
++{
++ const uint16_t vsshi_down = vsshi + 24; /* Panic threshold of 24 mV */
++ const uint16_t vsshi_up = vccio_mv - vsshi_down;
++ const union ddr_comp_vsshi_reg ddr_comp_vsshi = {
++ .panic_drv_down_vref = compute_vsshi_vref(ctrl, vsshi_down, false),
++ .panic_drv_up_vref = compute_vsshi_vref(ctrl, vsshi_up, true),
++ .vt_offset = 128 * 450 / vccio_mv / 2,
++ .vt_slope_a = 4,
++ };
++ mchbar_write32(DDR_COMP_VSSHI, ddr_comp_vsshi.raw);
++}
++
++static void program_misc(struct sysinfo *ctrl)
++{
++ ctrl->misc_control_0.raw = mchbar_read32(DDR_SCRAM_MISC_CONTROL);
++ ctrl->misc_control_0.weaklock_latency = 12;
++ ctrl->misc_control_0.wl_sleep_cycles = 5;
++ ctrl->misc_control_0.wl_wake_cycles = 2;
++ mchbar_write32(DDR_SCRAM_MISC_CONTROL, ctrl->misc_control_0.raw);
++ for (uint8_t channel = 0; channel < NUM_CHANNELS; channel++) {
++ /* Keep scrambling disabled for training */
++ mchbar_write32(DDR_SCRAMBLE_ch(channel), 0);
++ }
++}
++
++/* Very weird, application-specific function */
++static void override_comp(uint32_t value, uint32_t width, uint32_t shift, uint32_t offset)
++{
++ const uint32_t mask = (1 << width) - 1;
++ uint32_t reg32 = mchbar_read32(offset);
++ reg32 &= ~(mask << shift);
++ reg32 |= (value << shift);
++ mchbar_write32(offset, reg32);
++}
++
++static void program_ls_comp(struct sysinfo *ctrl)
++{
++ /* Disable periodic COMP */
++ const union pcu_comp_reg m_comp = {
++ .comp_disable = 1,
++ .comp_interval = COMP_INT,
++ .comp_force = 1,
++ };
++ mchbar_write32(M_COMP, m_comp.raw);
++ udelay(10);
++
++ /* Override level shifter compensation */
++ const uint32_t ls_comp = 2;
++ override_comp(ls_comp, 3, 28, DDR_DATA_RCOMP_DATA_1);
++ override_comp(ls_comp, 3, 24, DDR_CMD_COMP);
++ override_comp(ls_comp, 3, 24, DDR_CKE_CTL_COMP);
++ override_comp(ls_comp, 3, 23, DDR_CLK_COMP);
++ override_comp(ls_comp, 3, 28, DDR_COMP_DATA_COMP_1);
++ override_comp(ls_comp, 3, 24, DDR_COMP_CMD_COMP);
++ override_comp(ls_comp, 4, 24, DDR_COMP_CTL_COMP);
++ override_comp(ls_comp, 4, 23, DDR_COMP_CLK_COMP);
++ override_comp(ls_comp, 3, 24, DDR_COMP_OVERRIDE);
++
++ /* Manually update the COMP values */
++ union ddr_scram_misc_control_reg ddr_scram_misc_ctrl = ctrl->misc_control_0;
++ ddr_scram_misc_ctrl.force_comp_update = 1;
++ mchbar_write32(DDR_SCRAM_MISC_CONTROL, ddr_scram_misc_ctrl.raw);
++
++ /* Use a fixed offset between ODT Up/Dn */
++ const union ddr_comp_data_comp_1_reg data_comp_1 = {
++ .raw = mchbar_read32(DDR_COMP_DATA_COMP_1),
++ };
++ const uint32_t odt_offset = data_comp_1.rcomp_odt_down - data_comp_1.rcomp_odt_up;
++ ctrl->comp_ctl_0.odt_up_down_off = odt_offset;
++ ctrl->comp_ctl_0.fixed_odt_offset = 1;
++ mchbar_write32(DDR_COMP_CTL_0, ctrl->comp_ctl_0.raw);
++}
++
++/** TODO: Deduplicate PCODE stuff, it's already implemented in CPU code **/
++static bool pcode_ready(void)
++{
++ const unsigned int delay_step = 10;
++ for (unsigned int i = 0; i < 1000; i += delay_step) {
++ if (!(mchbar_read32(BIOS_MAILBOX_INTERFACE) & MAILBOX_RUN_BUSY))
++ return true;
++
++ udelay(delay_step);
++ };
++ return false;
++}
++
++static uint32_t pcode_mailbox_read(const uint32_t command)
++{
++ if (!pcode_ready()) {
++ printk(BIOS_ERR, "PCODE: mailbox timeout on wait ready\n");
++ return 0;
++ }
++ mchbar_write32(BIOS_MAILBOX_INTERFACE, command | MAILBOX_RUN_BUSY);
++ if (!pcode_ready()) {
++ printk(BIOS_ERR, "PCODE: mailbox timeout on completion\n");
++ return 0;
++ }
++ return mchbar_read32(BIOS_MAILBOX_DATA);
++}
++
++static int pcode_mailbox_write(const uint32_t command, const uint32_t data)
++{
++ if (!pcode_ready()) {
++ printk(BIOS_ERR, "PCODE: mailbox timeout on wait ready\n");
++ return -1;
++ }
++ mchbar_write32(BIOS_MAILBOX_DATA, data);
++ mchbar_write32(BIOS_MAILBOX_INTERFACE, command | MAILBOX_RUN_BUSY);
++ if (!pcode_ready()) {
++ printk(BIOS_ERR, "PCODE: mailbox timeout on completion\n");
++ return -1;
++ }
++ return 0;
++}
++
++static void enable_2x_refresh(struct sysinfo *ctrl)
++{
++ if (!CONFIG(ENABLE_DDR_2X_REFRESH))
++ return;
++
++ printk(BIOS_DEBUG, "Enabling 2x Refresh\n");
++ const bool asr = ctrl->flags.asr;
++ const bool lpddr = ctrl->lpddr;
++
++ /* Mutually exclusive */
++ assert(!asr || !lpddr);
++ if (!asr) {
++ uint32_t reg32 = pcode_mailbox_read(MAILBOX_BIOS_CMD_READ_DDR_2X_REFRESH);
++ if (!(reg32 & BIT(31))) { /** TODO: What to do if this is locked? **/
++ reg32 |= BIT(0); /* Enable 2x refresh */
++ reg32 |= BIT(31); /* Lock */
++
++ if (lpddr)
++ reg32 |= 4 << 1; /* LPDDR MR4 1/2 tREFI */
++
++ if (pcode_mailbox_write(MAILBOX_BIOS_CMD_WRITE_DDR_2X_REFRESH, reg32))
++ printk(BIOS_ERR, "Could not enable Mailbox 2x Refresh\n");
++ }
++ if (!lpddr)
++ return;
++ }
++ assert(asr || lpddr);
++ uint16_t refi_reduction = 50;
++ if (lpddr) {
++ refi_reduction = 97;
++ mchbar_clrbits32(PCU_DDR_PTM_CTL, 1 << 7); /* DISABLE_DRAM_TS */
++ }
++ /** TODO: Remember why this is only done on cold boots **/
++ if (ctrl->bootmode == BOOTMODE_COLD) {
++ ctrl->tREFI *= refi_reduction;
++ ctrl->tREFI /= 100;
++ }
++}
++
++static void set_pcu_ddr_voltage(const uint16_t vdd_mv)
++{
++ /** TODO: Handle other voltages? **/
++ uint32_t pcu_ddr_voltage;
++ switch (vdd_mv) {
++ case 1200:
++ pcu_ddr_voltage = 3;
++ break;
++ case 1350:
++ pcu_ddr_voltage = 1;
++ break;
++ default:
++ case 1500:
++ pcu_ddr_voltage = 0;
++ break;
++ }
++ /* Set bits 0..2 */
++ mchbar_write32(PCU_DDR_VOLTAGE, pcu_ddr_voltage);
++}
++
++static void program_scheduler(struct sysinfo *ctrl)
++{
++ /*
++ * ZQ calibration needs to be serialized for LPDDR3. Otherwise,
++ * the processor issues LPDDR3 ZQ calibration in parallel when
++ * exiting Package C7 or deeper. This causes problems for dual
++ * and quad die packages since all ranks share the same ZQ pin.
++ *
++ * Erratum HSM94: LPDDR3 ZQ Calibration Following Deep Package
++ * C-state Exit May Lead to Unpredictable System Behavior
++ */
++ const union mcscheds_cbit_reg mcscheds_cbit = {
++ .dis_write_gap = 1,
++ .dis_odt = is_hsw_ult() && !(ctrl->lpddr && ctrl->lpddr_dram_odt),
++ .serialize_zq = ctrl->lpddr,
++ };
++ mchbar_write32(MCSCHEDS_CBIT, mcscheds_cbit.raw);
++ mchbar_write32(MCMNTS_SC_WDBWM, 0x553c3038);
++ if (ctrl->lpddr) {
++ for (uint8_t channel = 0; channel < NUM_CHANNELS; channel++) {
++ if (!does_ch_exist(ctrl, channel))
++ continue;
++
++ union mcmain_command_rate_limit_reg cmd_rate_limit = {
++ .raw = mchbar_read32(COMMAND_RATE_LIMIT_ch(channel)),
++ };
++ cmd_rate_limit.enable_cmd_limit = 1;
++ cmd_rate_limit.cmd_rate_limit = 3;
++ mchbar_write32(COMMAND_RATE_LIMIT_ch(channel), cmd_rate_limit.raw);
++ }
++ }
++}
++
++static uint8_t biggest_channel(const struct sysinfo *const ctrl)
++{
++ _Static_assert(NUM_CHANNELS == 2, "Code assumes exactly two channels");
++ return !!(ctrl->channel_size_mb[0] < ctrl->channel_size_mb[1]);
++}
++
++static void dram_zones(struct sysinfo *ctrl)
++{
++ /** TODO: Activate channel hash here, if enabled **/
++ const uint8_t biggest = biggest_channel(ctrl);
++ const uint8_t smaller = !biggest;
++
++ /** TODO: Use stacked mode if Memory Trace is enabled **/
++ const union mad_chnl_reg mad_channel = {
++ .ch_a = biggest,
++ .ch_b = smaller,
++ .ch_c = 2,
++ .lpddr_mode = ctrl->lpddr,
++ };
++ mchbar_write32(MAD_CHNL, mad_channel.raw);
++
++ const uint8_t channel_b_zone_size = ctrl->channel_size_mb[smaller] / 256;
++ const union mad_zr_reg mad_zr = {
++ .ch_b_double = channel_b_zone_size * 2,
++ .ch_b_single = channel_b_zone_size,
++ };
++ mchbar_write32(MAD_ZR, mad_zr.raw);
++}
++
++static uint8_t biggest_dimm(const struct raminit_dimm_info *dimms)
++{
++ _Static_assert(NUM_SLOTS <= 2, "Code assumes at most two DIMMs per channel.");
++ if (NUM_SLOTS == 1)
++ return 0;
++
++ return !!(dimms[0].data.size_mb < dimms[1].data.size_mb);
++}
++
++static void dram_dimm_mapping(struct sysinfo *ctrl)
++{
++ for (uint8_t channel = 0; channel < NUM_CHANNELS; channel++) {
++ if (!does_ch_exist(ctrl, channel)) {
++ const union mad_dimm_reg mad_dimm = {
++ .rank_interleave = 1,
++ .enh_interleave = 1,
++ };
++ mchbar_write32(MAD_DIMM(channel), mad_dimm.raw);
++ continue;
++ }
++ const uint8_t biggest = biggest_dimm(ctrl->dimms[channel]);
++ const uint8_t smaller = !biggest;
++ const struct dimm_attr_ddr3_st *dimm_a = &ctrl->dimms[channel][biggest].data;
++ const struct dimm_attr_ddr3_st *dimm_b = &ctrl->dimms[channel][smaller].data;
++ union mad_dimm_reg mad_dimm = {
++ .dimm_a_size = dimm_a->size_mb / 256,
++ .dimm_b_size = dimm_b->size_mb / 256,
++ .dimm_a_sel = biggest,
++ .dimm_a_ranks = dimm_a->ranks == 2,
++ .dimm_b_ranks = dimm_b->ranks == 2,
++ .dimm_a_width = dimm_a->width == 16,
++ .dimm_b_width = dimm_b->width == 16,
++ .rank_interleave = 1,
++ .enh_interleave = 1,
++ .ecc_mode = 0, /* Do not enable ECC yet */
++ };
++ if (is_hsw_ult())
++ mad_dimm.dimm_b_width = mad_dimm.dimm_a_width;
++
++ mchbar_write32(MAD_DIMM(channel), mad_dimm.raw);
++ if (ctrl->lpddr)
++ die("%s: Missing LPDDR support (LPDDR_MR_PARAMS)\n", __func__);
++ }
++}
++
++enum raminit_status configure_mc(struct sysinfo *ctrl)
++{
++ const uint16_t vccio_mv = 1000;
++ const uint16_t vsshi_mv = ctrl->vdd_mv - 950;
++ const bool dis_odt_static = is_hsw_ult(); /* Disable static ODT legs on ULT */
++ const bool vddhi = ctrl->vdd_mv > 1350;
++
++ program_misc_control(ctrl);
++ program_mrc_revision();
++ program_ranks_used(ctrl);
++ program_ddr_data(ctrl, dis_odt_static, vddhi);
++ program_vsshi_control(ctrl, vsshi_mv);
++ program_dimm_vref(ctrl, vccio_mv, vddhi);
++ program_ddr_ca(ctrl, vddhi);
++ program_rcomp_vref(ctrl, dis_odt_static);
++ program_slew_rates(ctrl, vddhi);
++ program_vsshi(ctrl, vccio_mv, vsshi_mv);
++ program_misc(ctrl);
++ program_ls_comp(ctrl);
++ enable_2x_refresh(ctrl);
++ set_pcu_ddr_voltage(ctrl->vdd_mv);
++ configure_timings(ctrl);
++ configure_refresh(ctrl);
++ program_scheduler(ctrl);
++ dram_zones(ctrl);
++ dram_dimm_mapping(ctrl);
++
++ return RAMINIT_STATUS_SUCCESS;
++}
+diff --git a/src/northbridge/intel/haswell/native_raminit/raminit_main.c b/src/northbridge/intel/haswell/native_raminit/raminit_main.c
+index 5f2be980d4..3a773cfa19 100644
+--- a/src/northbridge/intel/haswell/native_raminit/raminit_main.c
++++ b/src/northbridge/intel/haswell/native_raminit/raminit_main.c
+@@ -23,6 +23,7 @@ static const struct task_entry cold_boot[] = {
+ { collect_spd_info, true, "PROCSPD", },
+ { initialise_mpll, true, "INITMPLL", },
+ { convert_timings, true, "CONVTIM", },
++ { configure_mc, true, "CONFMC", },
+ };
+
+ /* Return a generic stepping value to make stepping checks simpler */
+@@ -54,6 +55,7 @@ static void initialize_ctrl(struct sysinfo *ctrl)
+
+ ctrl->cpu = cpu_get_cpuid();
+ ctrl->stepping = get_stepping(ctrl->cpu);
++ ctrl->vdd_mv = is_hsw_ult() ? 1350 : 1500; /** FIXME: Hardcoded, does it matter? **/
+ ctrl->dq_pins_interleaved = cfg->dq_pins_interleaved;
+ ctrl->bootmode = bootmode;
+ }
+diff --git a/src/northbridge/intel/haswell/native_raminit/raminit_native.h b/src/northbridge/intel/haswell/native_raminit/raminit_native.h
+index 01e5ed1bd6..aa86b9aa39 100644
+--- a/src/northbridge/intel/haswell/native_raminit/raminit_native.h
++++ b/src/northbridge/intel/haswell/native_raminit/raminit_native.h
+@@ -3,15 +3,40 @@
+ #ifndef HASWELL_RAMINIT_NATIVE_H
+ #define HASWELL_RAMINIT_NATIVE_H
+
++#include <assert.h>
+ #include <device/dram/ddr3.h>
+ #include <northbridge/intel/haswell/haswell.h>
++#include <string.h>
++#include <types.h>
++
++#include "reg_structs.h"
+
+ #define SPD_LEN 256
+
++/* Each channel has 4 ranks, spread across 2 slots */
++#define NUM_SLOTRANKS 4
++
++#define NUM_GROUPS 2
++
+ /* 8 data lanes + 1 ECC lane */
+ #define NUM_LANES 9
+ #define NUM_LANES_NO_ECC 8
+
++#define COMP_INT 10
++
++/* Always use 12 legs for emphasis (not trained) */
++#define TXEQFULLDRV (3 << 4)
++
++enum command_training_iteration {
++ CT_ITERATION_CLOCK = 0,
++ CT_ITERATION_CMD_NORTH,
++ CT_ITERATION_CMD_SOUTH,
++ CT_ITERATION_CKE,
++ CT_ITERATION_CTL,
++ CT_ITERATION_CMD_VREF,
++ MAX_CT_ITERATION,
++};
++
+ enum raminit_boot_mode {
+ BOOTMODE_COLD,
+ BOOTMODE_WARM,
+@@ -57,6 +82,9 @@ struct sysinfo {
+ * LPDDR-specific functions have stubs which will halt upon execution.
+ */
+ bool lpddr;
++ bool lpddr_dram_odt;
++ uint8_t lpddr_cke_rank_map[NUM_CHANNELS];
++ uint8_t dq_byte_map[NUM_CHANNELS][MAX_CT_ITERATION][2];
+
+ struct raminit_dimm_info dimms[NUM_CHANNELS][NUM_SLOTS];
+ union dimm_flags_ddr3_st flags;
+@@ -93,16 +121,89 @@ struct sysinfo {
+ uint32_t mem_clock_mhz;
+ uint32_t mem_clock_fs; /* Memory clock period in femtoseconds */
+ uint32_t qclkps; /* Quadrature clock period in picoseconds */
++
++ uint16_t vdd_mv;
++
++ union ddr_scram_misc_control_reg misc_control_0;
++
++ union ddr_comp_ctl_0_reg comp_ctl_0;
++ union ddr_comp_ctl_1_reg comp_ctl_1;
++
++ union ddr_data_vref_adjust_reg dimm_vref;
++
++ uint32_t data_offset_train[NUM_CHANNELS][NUM_LANES];
++ uint32_t data_offset_comp[NUM_CHANNELS][NUM_LANES];
++
++ uint32_t dq_control_0[NUM_CHANNELS];
++ uint32_t dq_control_1[NUM_CHANNELS][NUM_LANES];
++ uint32_t dq_control_2[NUM_CHANNELS][NUM_LANES];
++
++ uint16_t tx_dq[NUM_CHANNELS][NUM_SLOTRANKS][NUM_LANES];
++ uint16_t txdqs[NUM_CHANNELS][NUM_SLOTRANKS][NUM_LANES];
++ uint8_t tx_eq[NUM_CHANNELS][NUM_SLOTRANKS][NUM_LANES];
++
++ uint16_t rcven[NUM_CHANNELS][NUM_SLOTRANKS][NUM_LANES];
++ uint8_t rx_eq[NUM_CHANNELS][NUM_SLOTRANKS][NUM_LANES];
++ uint8_t rxdqsp[NUM_CHANNELS][NUM_SLOTRANKS][NUM_LANES];
++ uint8_t rxdqsn[NUM_CHANNELS][NUM_SLOTRANKS][NUM_LANES];
++ int8_t rxvref[NUM_CHANNELS][NUM_SLOTRANKS][NUM_LANES];
++
++ uint8_t clk_pi_code[NUM_CHANNELS][NUM_SLOTRANKS];
++ uint8_t ctl_pi_code[NUM_CHANNELS][NUM_SLOTRANKS];
++ uint8_t cke_pi_code[NUM_CHANNELS][NUM_SLOTRANKS];
++
++ uint8_t cke_cmd_pi_code[NUM_CHANNELS][NUM_GROUPS];
++ uint8_t cmd_north_pi_code[NUM_CHANNELS][NUM_GROUPS];
++ uint8_t cmd_south_pi_code[NUM_CHANNELS][NUM_GROUPS];
+ };
+
++static inline bool is_hsw_ult(void)
++{
++ return CONFIG(INTEL_LYNXPOINT_LP);
++}
++
++static inline bool rank_in_mask(uint8_t rank, uint8_t rankmask)
++{
++ assert(rank < NUM_SLOTRANKS);
++ return !!(BIT(rank) & rankmask);
++}
++
++static inline bool does_ch_exist(const struct sysinfo *ctrl, uint8_t channel)
++{
++ return !!ctrl->dpc[channel];
++}
++
++static inline bool does_rank_exist(const struct sysinfo *ctrl, uint8_t rank)
++{
++ return rank_in_mask(rank, ctrl->rankmap[0] | ctrl->rankmap[1]);
++}
++
++static inline bool rank_in_ch(const struct sysinfo *ctrl, uint8_t rank, uint8_t channel)
++{
++ assert(channel < NUM_CHANNELS);
++ return rank_in_mask(rank, ctrl->rankmap[channel]);
++}
++
++/** TODO: Handling of data_offset_train could be improved, also coupled with reg updates **/
++static inline void clear_data_offset_train_all(struct sysinfo *ctrl)
++{
++ memset(ctrl->data_offset_train, 0, sizeof(ctrl->data_offset_train));
++}
++
+ void raminit_main(enum raminit_boot_mode bootmode);
+
+ enum raminit_status collect_spd_info(struct sysinfo *ctrl);
+ enum raminit_status initialise_mpll(struct sysinfo *ctrl);
+ enum raminit_status convert_timings(struct sysinfo *ctrl);
++enum raminit_status configure_mc(struct sysinfo *ctrl);
++
++void configure_timings(struct sysinfo *ctrl);
++void configure_refresh(struct sysinfo *ctrl);
+
+ enum raminit_status wait_for_first_rcomp(void);
+
++uint8_t get_rx_bias(const struct sysinfo *ctrl);
++
+ uint8_t get_tCWL(uint32_t mem_clock_mhz);
+ uint32_t get_tREFI(uint32_t mem_clock_mhz);
+ uint32_t get_tXP(uint32_t mem_clock_mhz);
+diff --git a/src/northbridge/intel/haswell/native_raminit/reg_structs.h b/src/northbridge/intel/haswell/native_raminit/reg_structs.h
+new file mode 100644
+index 0000000000..d11cda4b3d
+--- /dev/null
++++ b/src/northbridge/intel/haswell/native_raminit/reg_structs.h
+@@ -0,0 +1,405 @@
++/* SPDX-License-Identifier: GPL-2.0-or-later */
++
++#ifndef HASWELL_RAMINIT_REG_STRUCTS_H
++#define HASWELL_RAMINIT_REG_STRUCTS_H
++
++union ddr_data_rx_train_rank_reg {
++ struct __packed {
++ uint32_t rcven : 9; // Bits 8:0
++ uint32_t dqs_p : 6; // Bits 14:9
++ uint32_t rx_eq : 5; // Bits 19:15
++ uint32_t dqs_n : 6; // Bits 25:20
++ int32_t vref : 6; // Bits 31:26
++ };
++ uint32_t raw;
++};
++
++union ddr_data_tx_train_rank_reg {
++ struct __packed {
++ uint32_t dq_delay : 9; // Bits 8:0
++ uint32_t dqs_delay : 9; // Bits 17:9
++ uint32_t : 2; // Bits 19:18
++ uint32_t tx_eq : 6; // Bits 25:20
++ uint32_t : 6; // Bits 31:26
++ };
++ uint32_t raw;
++};
++
++union ddr_data_control_0_reg {
++ struct __packed {
++ uint32_t rx_training_mode : 1; // Bits 0:0
++ uint32_t wl_training_mode : 1; // Bits 1:1
++ uint32_t rl_training_mode : 1; // Bits 2:2
++ uint32_t samp_train_mode : 1; // Bits 3:3
++ uint32_t tx_on : 1; // Bits 4:4
++ uint32_t rf_on : 1; // Bits 5:5
++ uint32_t rx_pi_on : 1; // Bits 6:6
++ uint32_t tx_pi_on : 1; // Bits 7:7
++ uint32_t internal_clocks_on : 1; // Bits 8:8
++ uint32_t repeater_clocks_on : 1; // Bits 9:9
++ uint32_t tx_disable : 1; // Bits 10:10
++ uint32_t rx_disable : 1; // Bits 11:11
++ uint32_t tx_long : 1; // Bits 12:12
++ uint32_t rx_dqs_ctle : 2; // Bits 14:13
++ uint32_t rx_read_pointer : 3; // Bits 17:15
++ uint32_t driver_segment_enable : 1; // Bits 18:18
++ uint32_t data_vccddq_hi : 1; // Bits 19:19
++ uint32_t read_rf_rd : 1; // Bits 20:20
++ uint32_t read_rf_wr : 1; // Bits 21:21
++ uint32_t read_rf_rank : 2; // Bits 23:22
++ uint32_t force_odt_on : 1; // Bits 24:24
++ uint32_t odt_samp_off : 1; // Bits 25:25
++ uint32_t disable_odt_static : 1; // Bits 26:26
++ uint32_t ddr_cr_force_odt_on : 1; // Bits 27:27
++ uint32_t lpddr_mode : 1; // Bits 28:28
++ uint32_t en_read_preamble : 1; // Bits 29:29
++ uint32_t odt_samp_extend_en : 1; // Bits 30:30
++ uint32_t early_rleak_en : 1; // Bits 31:31
++ };
++ uint32_t raw;
++};
++
++union ddr_data_control_1_reg {
++ struct __packed {
++ int32_t ref_pi : 4; // Bits 3:0
++ uint32_t dll_mask : 2; // Bits 5:4
++ uint32_t dll_weaklock : 1; // Bits 6:6
++ uint32_t sdll_segment_disable : 3; // Bits 9:7
++ uint32_t rx_bias_ctl : 3; // Bits 12:10
++ int32_t odt_delay : 4; // Bits 16:13
++ uint32_t odt_duration : 3; // Bits 19:17
++ int32_t sense_amp_delay : 4; // Bits 23:20
++ uint32_t sense_amp_duration : 3; // Bits 26:24
++ uint32_t burst_end_odt_delay : 3; // Bits 29:27 *** TODO: Check Broadwell ***
++ uint32_t lpddr_long_odt_en : 1; // Bits 30:30
++ uint32_t : 1; // Bits 31:31
++ };
++ uint32_t raw;
++};
++
++/* NOTE: Bits 31:19 are only valid for Broadwell onwards */
++union ddr_data_control_2_reg {
++ struct __packed {
++ uint32_t rx_stagger_ctl : 5; // Bits 4:0
++ uint32_t force_bias_on : 1; // Bits 5:5
++ uint32_t force_rx_on : 1; // Bits 6:6
++ uint32_t leaker_comp : 2; // Bits 8:7
++ uint32_t rx_dqs_amp_offset : 4; // Bits 12:9
++ uint32_t rx_clk_stg_num : 5; // Bits 17:13
++ uint32_t wl_long_delay : 1; // Bits 18:18
++ uint32_t enable_vref_pwrdn : 1; // Bits 19:19
++ uint32_t ddr4_mode : 1; // Bits 20:20
++ uint32_t en_vddq_odt : 1; // Bits 21:21
++ uint32_t en_vtt_odt : 1; // Bits 22:22
++ uint32_t en_const_z_eq_tx : 1; // Bits 23:23
++ uint32_t tx_eq_dis : 1; // Bits 24:24
++ uint32_t rx_vref_prog_mfc : 1; // Bits 25:25
++ uint32_t cben : 3; // Bits 28:26
++ uint32_t tx_deskew_disable : 1; // Bits 29:29
++ uint32_t rx_deskew_disable : 1; // Bits 30:30
++ uint32_t dq_slew_dly_byp : 1; // Bits 31:31
++ };
++ uint32_t raw;
++};
++
++union ddr_comp_data_comp_1_reg {
++ struct __packed {
++ uint32_t rcomp_odt_up : 6; // Bits 5:0
++ uint32_t : 3; // Bits 8:6
++ uint32_t rcomp_odt_down : 6; // Bits 14:9
++ uint32_t : 1; // Bits 15:15
++ uint32_t panic_drv_down : 6; // Bits 21:16
++ uint32_t panic_drv_up : 6; // Bits 27:22
++ uint32_t ls_comp : 3; // Bits 30:28
++ uint32_t : 1; // Bits 31:31
++ };
++ uint32_t raw;
++};
++
++union ddr_comp_ctl_0_reg {
++ struct __packed {
++ uint32_t : 3; // Bits 2:0
++ uint32_t disable_odt_static : 1; // Bits 3:3
++ uint32_t odt_up_down_off : 6; // Bits 9:4
++ uint32_t fixed_odt_offset : 1; // Bits 10:10
++ int32_t dq_drv_vref : 4; // Bits 14:11
++ int32_t dq_odt_vref : 5; // Bits 19:15
++ int32_t cmd_drv_vref : 4; // Bits 23:20
++ int32_t ctl_drv_vref : 4; // Bits 27:24
++ int32_t clk_drv_vref : 4; // Bits 31:28
++ };
++ uint32_t raw;
++};
++
++union ddr_comp_ctl_1_reg {
++ struct __packed {
++ uint32_t dq_scomp : 5; // Bits 4:0
++ uint32_t cmd_scomp : 5; // Bits 9:5
++ uint32_t ctl_scomp : 5; // Bits 14:10
++ uint32_t clk_scomp : 5; // Bits 19:15
++ uint32_t tco_cmd_offset : 4; // Bits 23:20
++ uint32_t comp_clk_on : 1; // Bits 24:24
++ uint32_t vccddq_hi : 1; // Bits 25:25
++ uint32_t : 3; // Bits 28:26
++ uint32_t dis_quick_comp : 1; // Bits 29:29
++ uint32_t sin_step : 1; // Bits 30:30
++ uint32_t sin_step_adv : 1; // Bits 31:31
++ };
++ uint32_t raw;
++};
++
++union ddr_data_vref_adjust_reg {
++ struct __packed {
++ int32_t ca_vref_ctrl : 7;// Bits 6:0
++ int32_t ch1_vref_ctrl : 7;// Bits 13:7
++ int32_t ch0_vref_ctrl : 7;// Bits 20:14
++ uint32_t en_dimm_vref_ca : 1;// Bits 21:21
++ uint32_t en_dimm_vref_ch1 : 1;// Bits 22:22
++ uint32_t en_dimm_vref_ch0 : 1;// Bits 23:23
++ uint32_t hi_z_timer_ctrl : 2;// Bits 25:24
++ uint32_t vccddq_hi_qnnn_h : 1;// Bits 26:26
++ uint32_t : 2;// Bits 28:27
++ uint32_t ca_slow_bw : 1;// Bits 29:29
++ uint32_t ch0_slow_bw : 1;// Bits 30:30
++ uint32_t ch1_slow_bw : 1;// Bits 31:31
++ };
++ uint32_t raw;
++};
++
++union ddr_data_vref_control_reg {
++ struct __packed {
++ uint32_t hi_bw_divider : 2; // Bits 1:0
++ uint32_t lo_bw_divider : 2; // Bits 3:2
++ uint32_t sample_divider : 3; // Bits 6:4
++ uint32_t open_loop : 1; // Bits 7:7
++ uint32_t slow_bw_error : 2; // Bits 9:8
++ uint32_t hi_bw_enable : 1; // Bits 10:10
++ uint32_t : 1; // Bits 11:11
++ uint32_t vt_slope_b : 3; // Bits 14:12
++ uint32_t vt_slope_a : 3; // Bits 17:15
++ uint32_t vt_offset : 3; // Bits 20:18
++ uint32_t sel_code : 3; // Bits 23:21
++ uint32_t output_code : 8; // Bits 31:24
++ };
++ uint32_t raw;
++};
++
++union ddr_comp_vsshi_reg {
++ struct __packed {
++ uint32_t panic_drv_down_vref : 6; // Bits 5:0
++ uint32_t panic_drv_up_vref : 6; // Bits 11:6
++ uint32_t vt_offset : 5; // Bits 16:12
++ uint32_t vt_slope_a : 3; // Bits 19:17
++ uint32_t vt_slope_b : 3; // Bits 22:20
++ uint32_t : 9; // Bits 31:23
++ };
++ uint32_t raw;
++};
++
++union ddr_comp_vsshi_control_reg {
++ struct __packed {
++ uint32_t vsshi_target : 6; // Bits 5:0
++ uint32_t hi_bw_divider : 2; // Bits 7:6
++ uint32_t lo_bw_divider : 2; // Bits 9:8
++ uint32_t sample_divider : 3; // Bits 12:10
++ uint32_t open_loop : 1; // Bits 13:13
++ uint32_t bw_error : 2; // Bits 15:14
++ uint32_t panic_driver_en : 1; // Bits 16:16
++ uint32_t : 1; // Bits 17:17
++ uint32_t panic_voltage : 4; // Bits 21:18
++ uint32_t gain_boost : 1; // Bits 22:22
++ uint32_t sel_code : 1; // Bits 23:23
++ uint32_t output_code : 8; // Bits 31:24
++ };
++ uint32_t raw;
++};
++
++union ddr_clk_controls_reg {
++ struct __packed {
++ uint32_t ref_pi : 4; // Bits 3:0
++ uint32_t dll_mask : 2; // Bits 5:4
++ uint32_t : 1; // Bits 6:6
++ uint32_t tx_on : 1; // Bits 7:7
++ uint32_t internal_clocks_on : 1; // Bits 8:8
++ uint32_t repeater_clocks_on : 1; // Bits 9:9
++ uint32_t io_lb_ctl : 2; // Bits 11:10
++ uint32_t odt_mode : 1; // Bits 12:12
++ uint32_t : 8; // Bits 20:13
++ uint32_t rx_vref : 6; // Bits 26:21
++ uint32_t vccddq_hi : 1; // Bits 27:27
++ uint32_t dll_weaklock : 1; // Bits 28:28
++ uint32_t lpddr_mode : 1; // Bits 29:29
++ uint32_t : 2; // Bits 31:30
++ };
++ uint32_t raw;
++};
++
++union ddr_cmd_controls_reg {
++ struct __packed {
++ int32_t ref_pi : 4; // Bits 3:0
++ uint32_t dll_mask : 2; // Bits 5:4
++ uint32_t : 1; // Bits 6:6
++ uint32_t tx_on : 1; // Bits 7:7
++ uint32_t internal_clocks_on : 1; // Bits 8:8
++ uint32_t repeater_clocks_on : 1; // Bits 9:9
++ uint32_t io_lb_ctl : 2; // Bits 11:10
++ uint32_t odt_mode : 1; // Bits 12:12
++ uint32_t cmd_tx_eq : 2; // Bits 14:13
++ uint32_t early_weak_drive : 2; // Bits 16:15
++ uint32_t : 4; // Bits 20:17
++ int32_t rx_vref : 6; // Bits 26:21
++ uint32_t vccddq_hi : 1; // Bits 27:27
++ uint32_t dll_weaklock : 1; // Bits 28:28
++ uint32_t lpddr_mode : 1; // Bits 29:29
++ uint32_t lpddr_ca_a_dis : 1; // Bits 30:30
++ uint32_t lpddr_ca_b_dis : 1; // Bits 31:31
++ };
++ uint32_t raw;
++};
++
++/* Same register definition for CKE and CTL fubs */
++union ddr_cke_ctl_controls_reg {
++ struct __packed {
++ int32_t ref_pi : 4; // Bits 3:0
++ uint32_t dll_mask : 2; // Bits 5:4
++ uint32_t : 1; // Bits 6:6
++ uint32_t tx_on : 1; // Bits 7:7
++ uint32_t internal_clocks_on : 1; // Bits 8:8
++ uint32_t repeater_clocks_on : 1; // Bits 9:9
++ uint32_t io_lb_ctl : 2; // Bits 11:10
++ uint32_t odt_mode : 1; // Bits 12:12
++ uint32_t cmd_tx_eq : 2; // Bits 14:13
++ uint32_t early_weak_drive : 2; // Bits 16:15
++ uint32_t ctl_tx_eq : 2; // Bits 18:17
++ uint32_t ctl_sr_drv : 2; // Bits 20:19
++ int32_t rx_vref : 6; // Bits 26:21
++ uint32_t vccddq_hi : 1; // Bits 27:27
++ uint32_t dll_weaklock : 1; // Bits 28:28
++ uint32_t lpddr_mode : 1; // Bits 29:29
++ uint32_t la_drv_en_ovrd : 1; // Bits 30:30
++ uint32_t lpddr_ca_a_dis : 1; // Bits 31:31
++ };
++ uint32_t raw;
++};
++
++union ddr_scram_misc_control_reg {
++ struct __packed {
++ uint32_t wl_wake_cycles : 2; // Bits 1:0
++ uint32_t wl_sleep_cycles : 3; // Bits 4:2
++ uint32_t force_comp_update : 1; // Bits 5:5
++ uint32_t weaklock_latency : 4; // Bits 9:6
++ uint32_t ddr_no_ch_interleave : 1; // Bits 10:10
++ uint32_t lpddr_mode : 1; // Bits 11:11
++ uint32_t cke_mapping_ch0 : 4; // Bits 15:12
++ uint32_t cke_mapping_ch1 : 4; // Bits 19:16
++ uint32_t : 12; // Bits 31:20
++ };
++ uint32_t raw;
++};
++
++union mcscheds_cbit_reg {
++ struct __packed {
++ uint32_t dis_opp_cas : 1; // Bits 0:0
++ uint32_t dis_opp_is_cas : 1; // Bits 1:1
++ uint32_t dis_opp_ras : 1; // Bits 2:2
++ uint32_t dis_opp_is_ras : 1; // Bits 3:3
++ uint32_t dis_1c_byp : 1; // Bits 4:4
++ uint32_t dis_2c_byp : 1; // Bits 5:5
++ uint32_t dis_deprd_opt : 1; // Bits 6:6
++ uint32_t dis_pt_it : 1; // Bits 7:7
++ uint32_t dis_prcnt_ring : 1; // Bits 8:8
++ uint32_t dis_prcnt_sa : 1; // Bits 9:9
++ uint32_t dis_blkr_ph : 1; // Bits 10:10
++ uint32_t dis_blkr_pe : 1; // Bits 11:11
++ uint32_t dis_blkr_pm : 1; // Bits 12:12
++ uint32_t dis_odt : 1; // Bits 13:13
++ uint32_t oe_always_off : 1; // Bits 14:14
++ uint32_t : 1; // Bits 15:15
++ uint32_t dis_aom : 1; // Bits 16:16
++ uint32_t block_rpq : 1; // Bits 17:17
++ uint32_t block_wpq : 1; // Bits 18:18
++ uint32_t invert_align : 1; // Bits 19:19
++ uint32_t dis_write_gap : 1; // Bits 20:20
++ uint32_t dis_zq : 1; // Bits 21:21
++ uint32_t dis_tt : 1; // Bits 22:22
++ uint32_t dis_opp_ref : 1; // Bits 23:23
++ uint32_t long_zq : 1; // Bits 24:24
++ uint32_t dis_srx_zq : 1; // Bits 25:25
++ uint32_t serialize_zq : 1; // Bits 26:26
++ uint32_t zq_fast_exec : 1; // Bits 27:27
++ uint32_t dis_drive_nop : 1; // Bits 28:28
++ uint32_t pres_wdb_ent : 1; // Bits 29:29
++ uint32_t dis_clk_gate : 1; // Bits 30:30
++ uint32_t : 1; // Bits 31:31
++ };
++ uint32_t raw;
++};
++
++union mcmain_command_rate_limit_reg {
++ struct __packed {
++ uint32_t enable_cmd_limit : 1; // Bits 0:0
++ uint32_t cmd_rate_limit : 3; // Bits 3:1
++ uint32_t reset_on_command : 4; // Bits 7:4
++ uint32_t reset_delay : 4; // Bits 11:8
++ uint32_t ck_to_cke_delay : 2; // Bits 13:12
++ uint32_t : 17; // Bits 30:14
++ uint32_t init_mrw_2n_cs : 1; // Bits 31:31
++ };
++ uint32_t raw;
++};
++
++union mad_chnl_reg {
++ struct __packed {
++ uint32_t ch_a : 2; // Bits 1:0
++ uint32_t ch_b : 2; // Bits 3:2
++ uint32_t ch_c : 2; // Bits 5:4
++ uint32_t stacked_mode : 1; // Bits 6:6
++ uint32_t stkd_mode_bits : 3; // Bits 9:7
++ uint32_t lpddr_mode : 1; // Bits 10:10
++ uint32_t : 21; // Bits 31:11
++ };
++ uint32_t raw;
++};
++
++union mad_dimm_reg {
++ struct __packed {
++ uint32_t dimm_a_size : 8; // Bits 7:0
++ uint32_t dimm_b_size : 8; // Bits 15:8
++ uint32_t dimm_a_sel : 1; // Bits 16:16
++ uint32_t dimm_a_ranks : 1; // Bits 17:17
++ uint32_t dimm_b_ranks : 1; // Bits 18:18
++ uint32_t dimm_a_width : 1; // Bits 19:19
++ uint32_t dimm_b_width : 1; // Bits 20:20
++ uint32_t rank_interleave : 1; // Bits 21:21
++ uint32_t enh_interleave : 1; // Bits 22:22
++ uint32_t : 1; // Bits 23:23
++ uint32_t ecc_mode : 2; // Bits 25:24
++ uint32_t hori_mode : 1; // Bits 26:26
++ uint32_t hori_address : 3; // Bits 29:27
++ uint32_t : 2; // Bits 31:30
++ };
++ uint32_t raw;
++};
++
++union mad_zr_reg {
++ struct __packed {
++ uint32_t : 16; // Bits 15:0
++ uint32_t ch_b_double : 8; // Bits 23:16
++ uint32_t ch_b_single : 8; // Bits 31:24
++ };
++ uint32_t raw;
++};
++
++/* Same definition for P_COMP, M_COMP, D_COMP */
++union pcu_comp_reg {
++ struct __packed {
++ uint32_t comp_disable : 1; // Bits 0:0
++ uint32_t comp_interval : 4; // Bits 4:1
++ uint32_t : 3; // Bits 7:5
++ uint32_t comp_force : 1; // Bits 8:8
++ uint32_t : 23; // Bits 31:9
++ };
++ uint32_t raw;
++};
++
++#endif
+diff --git a/src/northbridge/intel/haswell/native_raminit/timings_refresh.c b/src/northbridge/intel/haswell/native_raminit/timings_refresh.c
+new file mode 100644
+index 0000000000..a9d960f31b
+--- /dev/null
++++ b/src/northbridge/intel/haswell/native_raminit/timings_refresh.c
+@@ -0,0 +1,13 @@
++/* SPDX-License-Identifier: GPL-2.0-or-later */
++
++#include "raminit_native.h"
++
++void configure_timings(struct sysinfo *ctrl)
++{
++ /** TODO: Stub **/
++}
++
++void configure_refresh(struct sysinfo *ctrl)
++{
++ /** TODO: Stub **/
++}
+diff --git a/src/northbridge/intel/haswell/registers/mchbar.h b/src/northbridge/intel/haswell/registers/mchbar.h
+index 45f8174995..4c3f399b5d 100644
+--- a/src/northbridge/intel/haswell/registers/mchbar.h
++++ b/src/northbridge/intel/haswell/registers/mchbar.h
+@@ -7,9 +7,98 @@
+ #define NUM_CHANNELS 2
+ #define NUM_SLOTS 2
+
++/* Indexed register helper macros */
++#define _DDRIO_C_R_B(r, ch, rank, byte) ((r) + 0x100 * (ch) + 0x4 * (rank) + 0x200 * (byte))
++#define _MCMAIN_C_X(r, ch, x) ((r) + 0x400 * (ch) + 0x4 * (x))
++#define _MCMAIN_C(r, ch) ((r) + 0x400 * (ch))
++
+ /* Register definitions */
++
++/* DDR DATA per-channel per-bytelane */
++#define DQ_CONTROL_2(ch, byte) _DDRIO_C_R_B(0x0064, ch, 0, byte)
++
++/* DDR CKE per-channel */
++#define DDR_CKE_ch_CMD_COMP_OFFSET(ch) _DDRIO_C_R_B(0x1204, ch, 0, 0)
++#define DDR_CKE_ch_CMD_PI_CODING(ch) _DDRIO_C_R_B(0x1208, ch, 0, 0)
++
++#define DDR_CKE_ch_CTL_CONTROLS(ch) _DDRIO_C_R_B(0x121c, ch, 0, 0)
++#define DDR_CKE_ch_CTL_RANKS_USED(ch) _DDRIO_C_R_B(0x1220, ch, 0, 0)
++
++/* DDR CTL per-channel */
++#define DDR_CTL_ch_CTL_CONTROLS(ch) _DDRIO_C_R_B(0x1c1c, ch, 0, 0)
++#define DDR_CTL_ch_CTL_RANKS_USED(ch) _DDRIO_C_R_B(0x1c20, ch, 0, 0)
++
++/* DDR CLK per-channel */
++#define DDR_CLK_ch_RANKS_USED(ch) _DDRIO_C_R_B(0x1800, ch, 0, 0)
++#define DDR_CLK_ch_COMP_OFFSET(ch) _DDRIO_C_R_B(0x1808, ch, 0, 0)
++#define DDR_CLK_ch_PI_CODING(ch) _DDRIO_C_R_B(0x180c, ch, 0, 0)
++#define DDR_CLK_ch_CONTROLS(ch) _DDRIO_C_R_B(0x1810, ch, 0, 0)
++
++/* DDR Scrambler */
++#define DDR_SCRAMBLE_ch(ch) (0x2000 + 4 * (ch))
++#define DDR_SCRAM_MISC_CONTROL 0x2008
++
++/* DDR CMDN/CMDS per-channel (writes go to both CMDN and CMDS fubs) */
++#define DDR_CMD_ch_COMP_OFFSET(ch) _DDRIO_C_R_B(0x3204, ch, 0, 0)
++#define DDR_CMD_ch_PI_CODING(ch) _DDRIO_C_R_B(0x3208, ch, 0, 0)
++#define DDR_CMD_ch_CONTROLS(ch) _DDRIO_C_R_B(0x320c, ch, 0, 0)
++
++/* DDR CKE/CTL per-channel (writes go to both CKE and CTL fubs) */
++#define DDR_CKE_CTL_ch_CTL_COMP_OFFSET(ch) _DDRIO_C_R_B(0x3414, ch, 0, 0)
++#define DDR_CKE_CTL_ch_CTL_PI_CODING(ch) _DDRIO_C_R_B(0x3418, ch, 0, 0)
++
++/* DDR DATA broadcast */
++#define DDR_DATA_RX_TRAIN_RANK(rank) _DDRIO_C_R_B(0x3600, 0, rank, 0)
++#define DDR_DATA_RX_PER_BIT_RANK(rank) _DDRIO_C_R_B(0x3610, 0, rank, 0)
++#define DDR_DATA_TX_TRAIN_RANK(rank) _DDRIO_C_R_B(0x3620, 0, rank, 0)
++#define DDR_DATA_TX_PER_BIT_RANK(rank) _DDRIO_C_R_B(0x3630, 0, rank, 0)
++
++#define DDR_DATA_RCOMP_DATA_1 0x3644
++#define DDR_DATA_TX_XTALK 0x3648
++#define DDR_DATA_RX_OFFSET_VDQ 0x364c
++#define DDR_DATA_OFFSET_COMP 0x365c
++#define DDR_DATA_CONTROL_1 0x3660
++
++#define DDR_DATA_OFFSET_TRAIN 0x3670
++#define DDR_DATA_CONTROL_0 0x3674
++#define DDR_DATA_VREF_ADJUST 0x3678
++
++/* DDR CMD broadcast */
++#define DDR_CMD_COMP 0x3700
++
++/* DDR CKE/CTL broadcast */
++#define DDR_CKE_CTL_COMP 0x3810
++
++/* DDR CLK broadcast */
++#define DDR_CLK_COMP 0x3904
++#define DDR_CLK_CONTROLS 0x3910
++#define DDR_CLK_CB_STATUS 0x3918
++
++/* DDR COMP (global) */
++#define DDR_COMP_DATA_COMP_1 0x3a04
++#define DDR_COMP_CMD_COMP 0x3a08
++#define DDR_COMP_CTL_COMP 0x3a0c
++#define DDR_COMP_CLK_COMP 0x3a10
++#define DDR_COMP_CTL_0 0x3a14
++#define DDR_COMP_CTL_1 0x3a18
++#define DDR_COMP_VSSHI 0x3a1c
++#define DDR_COMP_OVERRIDE 0x3a20
++#define DDR_COMP_VSSHI_CONTROL 0x3a24
++
++/* MCMAIN per-channel */
++#define COMMAND_RATE_LIMIT_ch(ch) _MCMAIN_C(0x4010, ch)
++
++#define MC_INIT_STATE_ch(ch) _MCMAIN_C(0x42a0, ch)
++
++/* MCMAIN broadcast */
++#define MCSCHEDS_CBIT 0x4c20
++
++#define MCMNTS_SC_WDBWM 0x4f8c
++
++/* MCDECS */
+ #define MAD_CHNL 0x5000 /* Address Decoder Channel Configuration */
+ #define MAD_DIMM(ch) (0x5004 + (ch) * 4)
++#define MAD_ZR 0x5014
+ #define MC_INIT_STATE_G 0x5030
+ #define MRC_REVISION 0x5034 /* MRC Revision */
+
+@@ -28,6 +117,8 @@
+
+ #define PCU_DDR_PTM_CTL 0x5880
+
++#define PCU_DDR_VOLTAGE 0x58a4
++
+ /* Some power MSRs are also represented in MCHBAR */
+ #define MCH_PKG_POWER_LIMIT_LO 0x59a0
+ #define MCH_PKG_POWER_LIMIT_HI 0x59a4
+@@ -48,6 +139,8 @@
+ #define MAILBOX_BIOS_CMD_FSM_MEASURE_INTVL 0x909
+ #define MAILBOX_BIOS_CMD_READ_PCH_POWER 0xa
+ #define MAILBOX_BIOS_CMD_READ_PCH_POWER_EXT 0xb
++#define MAILBOX_BIOS_CMD_READ_DDR_2X_REFRESH 0x17
++#define MAILBOX_BIOS_CMD_WRITE_DDR_2X_REFRESH 0x18
+ #define MAILBOX_BIOS_CMD_READ_C9C10_VOLTAGE 0x26
+ #define MAILBOX_BIOS_CMD_WRITE_C9C10_VOLTAGE 0x27
+
+@@ -66,6 +159,7 @@
+ #define MC_BIOS_REQ 0x5e00 /* Memory frequency request register */
+ #define MC_BIOS_DATA 0x5e04 /* Miscellaneous information for BIOS */
+ #define SAPMCTL 0x5f00
++#define M_COMP 0x5f08
+
+ #define HDAUDRID 0x6008
+ #define UMAGFXCTL 0x6020
+--
+2.39.2
+
diff --git a/resources/coreboot/haswell/patches/0014-haswell-NRI-Add-timings-refresh-programming.patch b/resources/coreboot/haswell/patches/0014-haswell-NRI-Add-timings-refresh-programming.patch
new file mode 100644
index 00000000..1b88f350
--- /dev/null
+++ b/resources/coreboot/haswell/patches/0014-haswell-NRI-Add-timings-refresh-programming.patch
@@ -0,0 +1,541 @@
+From b64d728bfe7c8ee44af252338257e95d87864659 Mon Sep 17 00:00:00 2001
+From: Angel Pons <th3fanbus@gmail.com>
+Date: Sat, 7 May 2022 20:59:58 +0200
+Subject: [PATCH 14/26] haswell NRI: Add timings/refresh programming
+
+Program the registers with timing and refresh parameters.
+
+Change-Id: Id2ea339d2c9ea8b56c71d6e88ec76949653ff5c2
+Signed-off-by: Angel Pons <th3fanbus@gmail.com>
+---
+ .../haswell/native_raminit/lookup_timings.c | 102 ++++++++
+ .../haswell/native_raminit/raminit_native.h | 14 ++
+ .../haswell/native_raminit/reg_structs.h | 93 +++++++
+ .../haswell/native_raminit/timings_refresh.c | 233 +++++++++++++++++-
+ .../intel/haswell/registers/mchbar.h | 12 +
+ 5 files changed, 452 insertions(+), 2 deletions(-)
+
+diff --git a/src/northbridge/intel/haswell/native_raminit/lookup_timings.c b/src/northbridge/intel/haswell/native_raminit/lookup_timings.c
+index 038686c844..afe2c615d2 100644
+--- a/src/northbridge/intel/haswell/native_raminit/lookup_timings.c
++++ b/src/northbridge/intel/haswell/native_raminit/lookup_timings.c
+@@ -60,3 +60,105 @@ uint32_t get_tXP(const uint32_t mem_clock_mhz)
+ };
+ return lookup_timing(mem_clock_mhz, lut, ARRAY_SIZE(lut));
+ }
++
++static uint32_t get_lpddr_tCKE(const uint32_t mem_clock_mhz)
++{
++ const struct timing_lookup lut[] = {
++ { 533, 4 },
++ { 666, 5 },
++ { fmax, 6 },
++ };
++ return lookup_timing(mem_clock_mhz, lut, ARRAY_SIZE(lut));
++}
++
++static uint32_t get_ddr_tCKE(const uint32_t mem_clock_mhz)
++{
++ const struct timing_lookup lut[] = {
++ { 533, 3 },
++ { 800, 4 },
++ { 933, 5 },
++ { 1200, 6 },
++ { fmax, 7 },
++ };
++ return lookup_timing(mem_clock_mhz, lut, ARRAY_SIZE(lut));
++}
++
++uint32_t get_tCKE(const uint32_t mem_clock_mhz, const bool lpddr)
++{
++ return lpddr ? get_lpddr_tCKE(mem_clock_mhz) : get_ddr_tCKE(mem_clock_mhz);
++}
++
++uint32_t get_tXPDLL(const uint32_t mem_clock_mhz)
++{
++ const struct timing_lookup lut[] = {
++ { 400, 10 },
++ { 533, 13 },
++ { 666, 16 },
++ { 800, 20 },
++ { 933, 23 },
++ { 1066, 26 },
++ { 1200, 29 },
++ { fmax, 32 },
++ };
++ return lookup_timing(mem_clock_mhz, lut, ARRAY_SIZE(lut));
++}
++
++uint32_t get_tAONPD(const uint32_t mem_clock_mhz)
++{
++ const struct timing_lookup lut[] = {
++ { 400, 4 },
++ { 533, 5 },
++ { 666, 6 },
++ { 800, 7 }, /* SNB had 8 */
++ { 933, 8 },
++ { 1066, 10 },
++ { 1200, 11 },
++ { fmax, 12 },
++ };
++ return lookup_timing(mem_clock_mhz, lut, ARRAY_SIZE(lut));
++}
++
++uint32_t get_tMOD(const uint32_t mem_clock_mhz)
++{
++ const struct timing_lookup lut[] = {
++ { 800, 12 },
++ { 933, 14 },
++ { 1066, 16 },
++ { 1200, 18 },
++ { fmax, 20 },
++ };
++ return lookup_timing(mem_clock_mhz, lut, ARRAY_SIZE(lut));
++}
++
++uint32_t get_tXS_offset(const uint32_t mem_clock_mhz)
++{
++ return DIV_ROUND_UP(mem_clock_mhz, 100);
++}
++
++static uint32_t get_lpddr_tZQOPER(const uint32_t mem_clock_mhz)
++{
++ return (mem_clock_mhz * 360) / 1000;
++}
++
++static uint32_t get_ddr_tZQOPER(const uint32_t mem_clock_mhz)
++{
++ const struct timing_lookup lut[] = {
++ { 800, 256 },
++ { 933, 299 },
++ { 1066, 342 },
++ { 1200, 384 },
++ { fmax, 427 },
++ };
++ return lookup_timing(mem_clock_mhz, lut, ARRAY_SIZE(lut));
++}
++
++/* tZQOPER defines the period required for ZQCL after SR exit */
++uint32_t get_tZQOPER(const uint32_t mem_clock_mhz, const bool lpddr)
++{
++ return lpddr ? get_lpddr_tZQOPER(mem_clock_mhz) : get_ddr_tZQOPER(mem_clock_mhz);
++}
++
++uint32_t get_tZQCS(const uint32_t mem_clock_mhz, const bool lpddr)
++{
++ return DIV_ROUND_UP(get_tZQOPER(mem_clock_mhz, lpddr), 4);
++}
+diff --git a/src/northbridge/intel/haswell/native_raminit/raminit_native.h b/src/northbridge/intel/haswell/native_raminit/raminit_native.h
+index aa86b9aa39..cd1f2eb2a5 100644
+--- a/src/northbridge/intel/haswell/native_raminit/raminit_native.h
++++ b/src/northbridge/intel/haswell/native_raminit/raminit_native.h
+@@ -155,6 +155,12 @@ struct sysinfo {
+ uint8_t cke_cmd_pi_code[NUM_CHANNELS][NUM_GROUPS];
+ uint8_t cmd_north_pi_code[NUM_CHANNELS][NUM_GROUPS];
+ uint8_t cmd_south_pi_code[NUM_CHANNELS][NUM_GROUPS];
++
++ union tc_bank_reg tc_bank[NUM_CHANNELS];
++ union tc_bank_rank_a_reg tc_bankrank_a[NUM_CHANNELS];
++ union tc_bank_rank_b_reg tc_bankrank_b[NUM_CHANNELS];
++ union tc_bank_rank_c_reg tc_bankrank_c[NUM_CHANNELS];
++ union tc_bank_rank_d_reg tc_bankrank_d[NUM_CHANNELS];
+ };
+
+ static inline bool is_hsw_ult(void)
+@@ -200,6 +206,14 @@ enum raminit_status configure_mc(struct sysinfo *ctrl);
+ void configure_timings(struct sysinfo *ctrl);
+ void configure_refresh(struct sysinfo *ctrl);
+
++uint32_t get_tCKE(uint32_t mem_clock_mhz, bool lpddr);
++uint32_t get_tXPDLL(uint32_t mem_clock_mhz);
++uint32_t get_tAONPD(uint32_t mem_clock_mhz);
++uint32_t get_tMOD(uint32_t mem_clock_mhz);
++uint32_t get_tXS_offset(uint32_t mem_clock_mhz);
++uint32_t get_tZQOPER(uint32_t mem_clock_mhz, bool lpddr);
++uint32_t get_tZQCS(uint32_t mem_clock_mhz, bool lpddr);
++
+ enum raminit_status wait_for_first_rcomp(void);
+
+ uint8_t get_rx_bias(const struct sysinfo *ctrl);
+diff --git a/src/northbridge/intel/haswell/native_raminit/reg_structs.h b/src/northbridge/intel/haswell/native_raminit/reg_structs.h
+index d11cda4b3d..70487e1640 100644
+--- a/src/northbridge/intel/haswell/native_raminit/reg_structs.h
++++ b/src/northbridge/intel/haswell/native_raminit/reg_structs.h
+@@ -335,6 +335,99 @@ union mcscheds_cbit_reg {
+ uint32_t raw;
+ };
+
++union tc_bank_reg {
++ struct __packed {
++ uint32_t tRCD : 5; // Bits 4:0
++ uint32_t tRP : 5; // Bits 9:5
++ uint32_t tRAS : 6; // Bits 15:10
++ uint32_t tRDPRE : 4; // Bits 19:16
++ uint32_t tWRPRE : 6; // Bits 25:20
++ uint32_t tRRD : 4; // Bits 29:26
++ uint32_t tRPab_ext : 2; // Bits 31:30
++ };
++ uint32_t raw;
++};
++
++union tc_bank_rank_a_reg {
++ struct __packed {
++ uint32_t tCKE : 4; // Bits 3:0
++ uint32_t tFAW : 8; // Bits 11:4
++ uint32_t tRDRD_sr : 3; // Bits 14:12
++ uint32_t tRDRD_dr : 4; // Bits 18:15
++ uint32_t tRDRD_dd : 4; // Bits 22:19
++ uint32_t tRDPDEN : 5; // Bits 27:23
++ uint32_t : 1; // Bits 28:28
++ uint32_t cmd_3st_dis : 1; // Bits 29:29
++ uint32_t cmd_stretch : 2; // Bits 31:30
++ };
++ uint32_t raw;
++};
++
++union tc_bank_rank_b_reg {
++ struct __packed {
++ uint32_t tWRRD_sr : 6; // Bits 5:0
++ uint32_t tWRRD_dr : 4; // Bits 9:6
++ uint32_t tWRRD_dd : 4; // Bits 13:10
++ uint32_t tWRWR_sr : 3; // Bits 16:14
++ uint32_t tWRWR_dr : 4; // Bits 20:17
++ uint32_t tWRWR_dd : 4; // Bits 24:21
++ uint32_t tWRPDEN : 6; // Bits 30:25
++ uint32_t dec_wrd : 1; // Bits 31:31
++ };
++ uint32_t raw;
++};
++
++union tc_bank_rank_c_reg {
++ struct __packed {
++ uint32_t tXPDLL : 6; // Bits 5:0
++ uint32_t tXP : 4; // Bits 9:6
++ uint32_t tAONPD : 4; // Bits 13:10
++ uint32_t tRDWR_sr : 5; // Bits 18:14
++ uint32_t tRDWR_dr : 5; // Bits 23:19
++ uint32_t tRDWR_dd : 5; // Bits 28:24
++ uint32_t : 3; // Bits 31:29
++ };
++ uint32_t raw;
++};
++
++/* NOTE: Non-ULT only implements the lower 21 bits (odt_write_delay is 2 bits) */
++union tc_bank_rank_d_reg {
++ struct __packed {
++ uint32_t tAA : 5; // Bits 4:0
++ uint32_t tCWL : 5; // Bits 9:5
++ uint32_t tCPDED : 2; // Bits 11:10
++ uint32_t tPRPDEN : 2; // Bits 13:12
++ uint32_t odt_read_delay : 3; // Bits 16:14
++ uint32_t odt_read_duration : 2; // Bits 18:17
++ uint32_t odt_write_duration : 3; // Bits 21:19
++ uint32_t odt_write_delay : 3; // Bits 24:22
++ uint32_t odt_always_rank_0 : 1; // Bits 25:25
++ uint32_t cmd_delay : 2; // Bits 27:26
++ uint32_t : 4; // Bits 31:28
++ };
++ uint32_t raw;
++};
++
++union tc_rftp_reg {
++ struct __packed {
++ uint32_t tREFI : 16; // Bits 15:0
++ uint32_t tRFC : 9; // Bits 24:16
++ uint32_t tREFIx9 : 7; // Bits 31:25
++ };
++ uint32_t raw;
++};
++
++union tc_srftp_reg {
++ struct __packed {
++ uint32_t tXSDLL : 12; // Bits 11:0
++ uint32_t tXS_offset : 4; // Bits 15:12
++ uint32_t tZQOPER : 10; // Bits 25:16
++ uint32_t : 2; // Bits 27:26
++ uint32_t tMOD : 4; // Bits 31:28
++ };
++ uint32_t raw;
++};
++
+ union mcmain_command_rate_limit_reg {
+ struct __packed {
+ uint32_t enable_cmd_limit : 1; // Bits 0:0
+diff --git a/src/northbridge/intel/haswell/native_raminit/timings_refresh.c b/src/northbridge/intel/haswell/native_raminit/timings_refresh.c
+index a9d960f31b..20a05b359b 100644
+--- a/src/northbridge/intel/haswell/native_raminit/timings_refresh.c
++++ b/src/northbridge/intel/haswell/native_raminit/timings_refresh.c
+@@ -1,13 +1,242 @@
+ /* SPDX-License-Identifier: GPL-2.0-or-later */
+
++#include <assert.h>
++#include <commonlib/clamp.h>
++#include <console/console.h>
++#include <delay.h>
++#include <device/pci_ops.h>
++#include <northbridge/intel/haswell/haswell.h>
++
+ #include "raminit_native.h"
+
++#define BL 8 /* Burst length */
++#define tCCD 4
++#define tRPRE 1
++#define tWPRE 1
++#define tDLLK 512
++
++static bool is_sodimm(const enum spd_dimm_type_ddr3 type)
++{
++ return type == SPD_DDR3_DIMM_TYPE_SO_DIMM || type == SPD_DDR3_DIMM_TYPE_72B_SO_UDIMM;
++}
++
++static uint8_t get_odt_stretch(const struct sysinfo *const ctrl)
++{
++ for (uint8_t channel = 0; channel < NUM_CHANNELS; channel++) {
++ /* Only stretch with 2 DIMMs per channel */
++ if (ctrl->dpc[channel] != 2)
++ continue;
++
++ const struct raminit_dimm_info *dimms = ctrl->dimms[channel];
++
++ /* Only stretch when using SO-DIMMs */
++ if (!is_sodimm(dimms[0].data.dimm_type) || !is_sodimm(dimms[1].data.dimm_type))
++ continue;
++
++ /* Only stretch with mismatched card types */
++ if (dimms[0].data.reference_card == dimms[1].data.reference_card)
++ continue;
++
++ /* Stretch if one SO-DIMM is card F */
++ for (uint8_t slot = 0; slot < NUM_SLOTS; slot++) {
++ if (dimms[slot].data.reference_card == 5)
++ return 1;
++ }
++ }
++ return 0;
++}
++
++static union tc_bank_reg make_tc_bank(struct sysinfo *const ctrl)
++{
++ return (union tc_bank_reg) {
++ .tRCD = ctrl->tRCD,
++ .tRP = ctrl->tRP,
++ .tRAS = ctrl->tRAS,
++ .tRDPRE = ctrl->tRTP,
++ .tWRPRE = 4 + ctrl->tCWL + ctrl->tWR,
++ .tRRD = ctrl->tRRD,
++ .tRPab_ext = 0, /** TODO: For LPDDR, this is ctrl->tRPab - ctrl->tRP **/
++ };
++}
++
++static union tc_bank_rank_a_reg make_tc_bankrank_a(struct sysinfo *ctrl, uint8_t odt_stretch)
++{
++ /* Use 3N mode for DDR during training, but always use 1N mode for LPDDR */
++ const uint32_t tCMD = ctrl->lpddr ? 0 : 3;
++ const uint32_t tRDRD_drdd = BL / 2 + 1 + tRPRE + odt_stretch + !!ctrl->lpddr;
++
++ return (union tc_bank_rank_a_reg) {
++ .tCKE = get_tCKE(ctrl->mem_clock_mhz, ctrl->lpddr),
++ .tFAW = ctrl->tFAW,
++ .tRDRD_sr = tCCD,
++ .tRDRD_dr = tRDRD_drdd,
++ .tRDRD_dd = tRDRD_drdd,
++ .tRDPDEN = ctrl->tAA + BL / 2 + 1,
++ .cmd_3st_dis = 1, /* Disable command tri-state before training */
++ .cmd_stretch = tCMD,
++ };
++}
++
++static union tc_bank_rank_b_reg make_tc_bankrank_b(struct sysinfo *const ctrl)
++{
++ const uint8_t tWRRD_drdd = ctrl->tCWL - ctrl->tAA + BL / 2 + 2 + tRPRE;
++ const uint8_t tWRWR_drdd = BL / 2 + 2 + tWPRE;
++
++ return (union tc_bank_rank_b_reg) {
++ .tWRRD_sr = tCCD + ctrl->tCWL + ctrl->tWTR + 2,
++ .tWRRD_dr = ctrl->lpddr ? 8 : tWRRD_drdd,
++ .tWRRD_dd = ctrl->lpddr ? 8 : tWRRD_drdd,
++ .tWRWR_sr = tCCD,
++ .tWRWR_dr = tWRWR_drdd,
++ .tWRWR_dd = tWRWR_drdd,
++ .tWRPDEN = ctrl->tWR + ctrl->tCWL + BL / 2,
++ .dec_wrd = ctrl->tCWL >= 6,
++ };
++}
++
++static uint32_t get_tRDWR_sr(const struct sysinfo *ctrl)
++{
++ if (ctrl->lpddr) {
++ const uint32_t tdqsck_max = DIV_ROUND_UP(5500, ctrl->qclkps * 2);
++ return ctrl->tAA - ctrl->tCWL + tCCD + tWPRE + tdqsck_max + 1;
++ } else {
++ const bool fast_clock = ctrl->mem_clock_mhz > 666;
++ return ctrl->tAA - ctrl->tCWL + tCCD + tWPRE + 2 + fast_clock;
++ }
++}
++
++static union tc_bank_rank_c_reg make_tc_bankrank_c(struct sysinfo *ctrl, uint8_t odt_stretch)
++{
++ const uint32_t tRDWR_sr = get_tRDWR_sr(ctrl);
++ const uint32_t tRDWR_drdd = tRDWR_sr + odt_stretch;
++
++ return (union tc_bank_rank_c_reg) {
++ .tXPDLL = get_tXPDLL(ctrl->mem_clock_mhz),
++ .tXP = MAX(ctrl->tXP, 7), /* Use a higher tXP for training */
++ .tAONPD = get_tAONPD(ctrl->mem_clock_mhz),
++ .tRDWR_sr = tRDWR_sr,
++ .tRDWR_dr = tRDWR_drdd,
++ .tRDWR_dd = tRDWR_drdd,
++ };
++}
++
++static union tc_bank_rank_d_reg make_tc_bankrank_d(struct sysinfo *ctrl, uint8_t odt_stretch)
++{
++ const uint32_t odt_rd_delay = ctrl->tAA - ctrl->tCWL;
++ if (!ctrl->lpddr) {
++ return (union tc_bank_rank_d_reg) {
++ .tAA = ctrl->tAA,
++ .tCWL = ctrl->tCWL,
++ .tCPDED = 1,
++ .tPRPDEN = 1,
++ .odt_read_delay = odt_rd_delay,
++ .odt_read_duration = odt_stretch,
++ };
++ }
++
++ /* tCWL has 1 extra clock because of tDQSS, subtract it here */
++ const uint32_t tCWL_lpddr = ctrl->tCWL - 1;
++ const uint32_t odt_wr_delay = tCWL_lpddr + DIV_ROUND_UP(3500, ctrl->qclkps * 2);
++ const uint32_t odt_wr_duration = DIV_ROUND_UP(3500 - 1750, ctrl->qclkps * 2) + 1;
++
++ return (union tc_bank_rank_d_reg) {
++ .tAA = ctrl->tAA,
++ .tCWL = tCWL_lpddr,
++ .tCPDED = 2, /* Required by JEDEC LPDDR3 spec */
++ .tPRPDEN = 1,
++ .odt_read_delay = odt_rd_delay,
++ .odt_read_duration = odt_stretch,
++ .odt_write_delay = odt_wr_delay,
++ .odt_write_duration = odt_wr_duration,
++ .odt_always_rank_0 = ctrl->lpddr_dram_odt
++ };
++}
++
++/* ZQCS period values, in (tREFI * 128) units */
++#define ZQCS_PERIOD_DDR3 128 /* tREFI * 128 = 7.8 us * 128 = 1ms */
++#define ZQCS_PERIOD_LPDDR3 256 /* tREFI * 128 = 3.9 us * 128 = 0.5ms */
++
++static uint32_t make_tc_zqcal(const struct sysinfo *const ctrl)
++{
++ const uint32_t zqcs_period = ctrl->lpddr ? ZQCS_PERIOD_LPDDR3 : ZQCS_PERIOD_DDR3;
++ const uint32_t tZQCS = get_tZQCS(ctrl->mem_clock_mhz, ctrl->lpddr);
++ return tZQCS << (is_hsw_ult() ? 10 : 8) | zqcs_period;
++}
++
++static union tc_rftp_reg make_tc_rftp(const struct sysinfo *const ctrl)
++{
++ /*
++ * The tREFIx9 field should be programmed to minimum of 8.9 * tREFI (to allow
++ * for possible delays from ZQ or isoc) and tRASmax (70us) divided by 1024.
++ */
++ return (union tc_rftp_reg) {
++ .tREFI = ctrl->tREFI,
++ .tRFC = ctrl->tRFC,
++ .tREFIx9 = ctrl->tREFI * 89 / 10240,
++ };
++}
++
++static union tc_srftp_reg make_tc_srftp(const struct sysinfo *const ctrl)
++{
++ return (union tc_srftp_reg) {
++ .tXSDLL = tDLLK,
++ .tXS_offset = get_tXS_offset(ctrl->mem_clock_mhz),
++ .tZQOPER = get_tZQOPER(ctrl->mem_clock_mhz, ctrl->lpddr),
++ .tMOD = get_tMOD(ctrl->mem_clock_mhz) - 8,
++ };
++}
++
+ void configure_timings(struct sysinfo *ctrl)
+ {
+- /** TODO: Stub **/
++ if (ctrl->lpddr)
++ die("%s: Missing support for LPDDR\n");
++
++ const uint8_t odt_stretch = get_odt_stretch(ctrl);
++ const union tc_bank_reg tc_bank = make_tc_bank(ctrl);
++ const union tc_bank_rank_a_reg tc_bank_rank_a = make_tc_bankrank_a(ctrl, odt_stretch);
++ const union tc_bank_rank_b_reg tc_bank_rank_b = make_tc_bankrank_b(ctrl);
++ const union tc_bank_rank_c_reg tc_bank_rank_c = make_tc_bankrank_c(ctrl, odt_stretch);
++ const union tc_bank_rank_d_reg tc_bank_rank_d = make_tc_bankrank_d(ctrl, odt_stretch);
++
++ const uint8_t wr_delay = tc_bank_rank_b.dec_wrd + 1;
++ uint8_t sc_wr_add_delay = 0;
++ sc_wr_add_delay |= wr_delay << 0;
++ sc_wr_add_delay |= wr_delay << 2;
++ sc_wr_add_delay |= wr_delay << 4;
++ sc_wr_add_delay |= wr_delay << 6;
++
++ for (uint8_t channel = 0; channel < NUM_CHANNELS; channel++) {
++ if (!does_ch_exist(ctrl, channel))
++ continue;
++
++ ctrl->tc_bank[channel] = tc_bank;
++ ctrl->tc_bankrank_a[channel] = tc_bank_rank_a;
++ ctrl->tc_bankrank_b[channel] = tc_bank_rank_b;
++ ctrl->tc_bankrank_c[channel] = tc_bank_rank_c;
++ ctrl->tc_bankrank_d[channel] = tc_bank_rank_d;
++
++ mchbar_write32(TC_BANK_ch(channel), ctrl->tc_bank[channel].raw);
++ mchbar_write32(TC_BANK_RANK_A_ch(channel), ctrl->tc_bankrank_a[channel].raw);
++ mchbar_write32(TC_BANK_RANK_B_ch(channel), ctrl->tc_bankrank_b[channel].raw);
++ mchbar_write32(TC_BANK_RANK_C_ch(channel), ctrl->tc_bankrank_c[channel].raw);
++ mchbar_write32(TC_BANK_RANK_D_ch(channel), ctrl->tc_bankrank_d[channel].raw);
++ mchbar_write8(SC_WR_ADD_DELAY_ch(channel), sc_wr_add_delay);
++ }
+ }
+
+ void configure_refresh(struct sysinfo *ctrl)
+ {
+- /** TODO: Stub **/
++ const union tc_srftp_reg tc_srftp = make_tc_srftp(ctrl);
++ const union tc_rftp_reg tc_rftp = make_tc_rftp(ctrl);
++ const uint32_t tc_zqcal = make_tc_zqcal(ctrl);
++
++ for (uint8_t channel = 0; channel < NUM_CHANNELS; channel++) {
++ if (!does_ch_exist(ctrl, channel))
++ continue;
++
++ mchbar_setbits32(TC_RFP_ch(channel), 0xff);
++ mchbar_write32(TC_RFTP_ch(channel), tc_rftp.raw);
++ mchbar_write32(TC_SRFTP_ch(channel), tc_srftp.raw);
++ mchbar_write32(TC_ZQCAL_ch(channel), tc_zqcal);
++ }
+ }
+diff --git a/src/northbridge/intel/haswell/registers/mchbar.h b/src/northbridge/intel/haswell/registers/mchbar.h
+index 4c3f399b5d..2acc5cbbc8 100644
+--- a/src/northbridge/intel/haswell/registers/mchbar.h
++++ b/src/northbridge/intel/haswell/registers/mchbar.h
+@@ -86,9 +86,21 @@
+ #define DDR_COMP_VSSHI_CONTROL 0x3a24
+
+ /* MCMAIN per-channel */
++#define TC_BANK_ch(ch) _MCMAIN_C(0x4000, ch)
++#define TC_BANK_RANK_A_ch(ch) _MCMAIN_C(0x4004, ch)
++#define TC_BANK_RANK_B_ch(ch) _MCMAIN_C(0x4008, ch)
++#define TC_BANK_RANK_C_ch(ch) _MCMAIN_C(0x400c, ch)
+ #define COMMAND_RATE_LIMIT_ch(ch) _MCMAIN_C(0x4010, ch)
++#define TC_BANK_RANK_D_ch(ch) _MCMAIN_C(0x4014, ch)
++#define SC_ROUNDT_LAT_ch(ch) _MCMAIN_C(0x4024, ch)
+
++#define SC_WR_ADD_DELAY_ch(ch) _MCMAIN_C(0x40d0, ch)
++
++#define TC_ZQCAL_ch(ch) _MCMAIN_C(0x4290, ch)
++#define TC_RFP_ch(ch) _MCMAIN_C(0x4294, ch)
++#define TC_RFTP_ch(ch) _MCMAIN_C(0x4298, ch)
+ #define MC_INIT_STATE_ch(ch) _MCMAIN_C(0x42a0, ch)
++#define TC_SRFTP_ch(ch) _MCMAIN_C(0x42a4, ch)
+
+ /* MCMAIN broadcast */
+ #define MCSCHEDS_CBIT 0x4c20
+--
+2.39.2
+
diff --git a/resources/coreboot/haswell/patches/0015-haswell-NRI-Program-memory-map.patch b/resources/coreboot/haswell/patches/0015-haswell-NRI-Program-memory-map.patch
new file mode 100644
index 00000000..ad8527b2
--- /dev/null
+++ b/resources/coreboot/haswell/patches/0015-haswell-NRI-Program-memory-map.patch
@@ -0,0 +1,263 @@
+From 89ff35083af68d1b24c1633886202ecc153af67d Mon Sep 17 00:00:00 2001
+From: Angel Pons <th3fanbus@gmail.com>
+Date: Sat, 7 May 2022 21:24:50 +0200
+Subject: [PATCH 15/26] haswell NRI: Program memory map
+
+This is very similar to Sandy/Ivy Bridge, except that there's several
+registers to program in GDXCBAR. One of these GDXCBAR registers has a
+lock bit that must be set in order for the memory controller to allow
+normal access to DRAM. And it took me four months to realize this one
+bit was the only reason why native raminit did not work.
+
+Change-Id: I3af73a018a7ba948701a542e661e7fefd57591fe
+Signed-off-by: Angel Pons <th3fanbus@gmail.com>
+---
+ .../intel/haswell/native_raminit/Makefile.inc | 1 +
+ .../intel/haswell/native_raminit/memory_map.c | 183 ++++++++++++++++++
+ .../haswell/native_raminit/raminit_main.c | 1 +
+ .../haswell/native_raminit/raminit_native.h | 1 +
+ .../intel/haswell/registers/host_bridge.h | 2 +
+ 5 files changed, 188 insertions(+)
+ create mode 100644 src/northbridge/intel/haswell/native_raminit/memory_map.c
+
+diff --git a/src/northbridge/intel/haswell/native_raminit/Makefile.inc b/src/northbridge/intel/haswell/native_raminit/Makefile.inc
+index fc55277a65..37d527e972 100644
+--- a/src/northbridge/intel/haswell/native_raminit/Makefile.inc
++++ b/src/northbridge/intel/haswell/native_raminit/Makefile.inc
+@@ -4,6 +4,7 @@ romstage-y += configure_mc.c
+ romstage-y += lookup_timings.c
+ romstage-y += init_mpll.c
+ romstage-y += io_comp_control.c
++romstage-y += memory_map.c
+ romstage-y += raminit_main.c
+ romstage-y += raminit_native.c
+ romstage-y += spd_bitmunching.c
+diff --git a/src/northbridge/intel/haswell/native_raminit/memory_map.c b/src/northbridge/intel/haswell/native_raminit/memory_map.c
+new file mode 100644
+index 0000000000..e3aded2b37
+--- /dev/null
++++ b/src/northbridge/intel/haswell/native_raminit/memory_map.c
+@@ -0,0 +1,183 @@
++/* SPDX-License-Identifier: GPL-2.0-or-later */
++
++#include <device/pci_ops.h>
++#include <northbridge/intel/haswell/haswell.h>
++#include <southbridge/intel/lynxpoint/me.h>
++#include <types.h>
++
++#include "raminit_native.h"
++
++/* GDXCBAR */
++#define MPCOHTRK_GDXC_MOT_ADDRESS_LO 0x10
++#define MPCOHTRK_GDXC_MOT_ADDRESS_HI 0x14
++#define MPCOHTRK_GDXC_MOT_REGION 0x18
++
++#define MPCOHTRK_GDXC_OCLA_ADDRESS_LO 0x20
++#define MPCOHTRK_GDXC_OCLA_ADDRESS_HI 0x24
++#define MPCOHTRK_GDXC_OCLA_REGION 0x28
++
++/* This lock bit made me lose what little sanity I had left. - Angel Pons */
++#define MPCOHTRK_GDXC_OCLA_ADDRESS_HI_LOCK BIT(2)
++
++static inline uint32_t gdxcbar_read32(const uintptr_t offset)
++{
++ return read32p((mchbar_read32(GDXCBAR) & ~1) + offset);
++}
++
++static inline void gdxcbar_write32(const uintptr_t offset, const uint32_t value)
++{
++ write32p((mchbar_read32(GDXCBAR) & ~1) + offset, value);
++}
++
++static inline void gdxcbar_clrsetbits32(const uintptr_t offset, uint32_t clear, uint32_t set)
++{
++ const uintptr_t address = (mchbar_read32(GDXCBAR) & ~1) + offset;
++ clrsetbits32((void *)address, clear, set);
++}
++
++#define gdxcbar_setbits32(offset, set) gdxcbar_clrsetbits32(offset, 0, set)
++#define gdxcbar_clrbits32(offset, clear) gdxcbar_clrsetbits32(offset, clear, 0)
++
++/* All values stored in here (except the bool) are specified in MiB */
++struct memory_map_data {
++ uint32_t dpr_size;
++ uint32_t tseg_size;
++ uint32_t gtt_size;
++ uint32_t gms_size;
++ uint32_t me_stolen_size;
++ uint32_t mmio_size;
++ uint32_t touud;
++ uint32_t remaplimit;
++ uint32_t remapbase;
++ uint32_t tom;
++ uint32_t tom_minus_me;
++ uint32_t tolud;
++ uint32_t bdsm_base;
++ uint32_t gtt_base;
++ uint32_t tseg_base;
++ bool reclaim_possible;
++};
++
++static void compute_memory_map(struct memory_map_data *map)
++{
++ map->tom_minus_me = map->tom - map->me_stolen_size;
++
++ /*
++ * MMIO size will actually be slightly smaller than computed,
++ * but matches what MRC does and is more MTRR-friendly given
++ * that TSEG is treated as WB, but SMRR makes TSEG UC anyway.
++ */
++ const uint32_t mmio_size = MIN(map->tom_minus_me, 4096) / 2;
++ map->gtt_base = ALIGN_DOWN(mmio_size, map->tseg_size);
++ map->tseg_base = map->gtt_base - map->tseg_size;
++ map->bdsm_base = map->gtt_base + map->gtt_size;
++ map->tolud = map->bdsm_base + map->gms_size;
++ map->reclaim_possible = map->tom_minus_me > map->tolud;
++
++ if (map->reclaim_possible) {
++ map->remapbase = MAX(4096, map->tom_minus_me);
++ map->touud = MIN(4096, map->tom_minus_me) + map->remapbase - map->tolud;
++ map->remaplimit = map->touud - 1;
++ } else {
++ map->remapbase = 0;
++ map->remaplimit = 0;
++ map->touud = map->tom_minus_me;
++ }
++}
++
++static void display_memory_map(const struct memory_map_data *map)
++{
++ if (!CONFIG(DEBUG_RAM_SETUP))
++ return;
++
++ printk(BIOS_DEBUG, "============ MEMORY MAP ============\n");
++ printk(BIOS_DEBUG, "\n");
++ printk(BIOS_DEBUG, "dpr_size = %u MiB\n", map->dpr_size);
++ printk(BIOS_DEBUG, "tseg_size = %u MiB\n", map->tseg_size);
++ printk(BIOS_DEBUG, "gtt_size = %u MiB\n", map->gtt_size);
++ printk(BIOS_DEBUG, "gms_size = %u MiB\n", map->gms_size);
++ printk(BIOS_DEBUG, "me_stolen_size = %u MiB\n", map->me_stolen_size);
++ printk(BIOS_DEBUG, "\n");
++ printk(BIOS_DEBUG, "touud = %u MiB\n", map->touud);
++ printk(BIOS_DEBUG, "remaplimit = %u MiB\n", map->remaplimit);
++ printk(BIOS_DEBUG, "remapbase = %u MiB\n", map->remapbase);
++ printk(BIOS_DEBUG, "tom = %u MiB\n", map->tom);
++ printk(BIOS_DEBUG, "tom_minus_me = %u MiB\n", map->tom_minus_me);
++ printk(BIOS_DEBUG, "tolud = %u MiB\n", map->tolud);
++ printk(BIOS_DEBUG, "bdsm_base = %u MiB\n", map->bdsm_base);
++ printk(BIOS_DEBUG, "gtt_base = %u MiB\n", map->gtt_base);
++ printk(BIOS_DEBUG, "tseg_base = %u MiB\n", map->tseg_base);
++ printk(BIOS_DEBUG, "\n");
++ printk(BIOS_DEBUG, "reclaim_possible = %s\n", map->reclaim_possible ? "Yes" : "No");
++}
++
++static void map_write_reg64(const uint16_t reg, const uint64_t size)
++{
++ const uint64_t value = size << 20;
++ pci_write_config32(HOST_BRIDGE, reg + 4, value >> 32);
++ pci_write_config32(HOST_BRIDGE, reg + 0, value >> 0);
++}
++
++static void map_write_reg32(const uint16_t reg, const uint32_t size)
++{
++ const uint32_t value = size << 20;
++ pci_write_config32(HOST_BRIDGE, reg, value);
++}
++
++static void program_memory_map(const struct memory_map_data *map)
++{
++ map_write_reg64(TOUUD, map->touud);
++ map_write_reg64(TOM, map->tom);
++ if (map->reclaim_possible) {
++ map_write_reg64(REMAPBASE, map->remapbase);
++ map_write_reg64(REMAPLIMIT, map->remaplimit);
++ }
++ if (map->me_stolen_size) {
++ map_write_reg64(MESEG_LIMIT, 0x80000 - map->me_stolen_size);
++ map_write_reg64(MESEG_BASE, map->tom_minus_me);
++ pci_or_config32(HOST_BRIDGE, MESEG_LIMIT, ME_STLEN_EN);
++ }
++ map_write_reg32(TOLUD, map->tolud);
++ map_write_reg32(BDSM, map->bdsm_base);
++ map_write_reg32(BGSM, map->gtt_base);
++ map_write_reg32(TSEG, map->tseg_base);
++
++ const uint32_t dpr_reg = map->tseg_base << 20 | map->dpr_size << 4;
++ pci_write_config32(HOST_BRIDGE, DPR, dpr_reg);
++
++ const uint16_t gfx_stolen_size = GGC_IGD_MEM_IN_32MB_UNITS(map->gms_size / 32);
++ const uint16_t ggc = map->gtt_size << 8 | gfx_stolen_size;
++ pci_write_config16(HOST_BRIDGE, GGC, ggc);
++
++ /** TODO: Do not hardcode these? GDXC has weird alignment requirements, though. **/
++ gdxcbar_write32(MPCOHTRK_GDXC_MOT_ADDRESS_LO, 0);
++ gdxcbar_write32(MPCOHTRK_GDXC_MOT_ADDRESS_HI, 0);
++ gdxcbar_write32(MPCOHTRK_GDXC_MOT_REGION, 0);
++
++ gdxcbar_write32(MPCOHTRK_GDXC_OCLA_ADDRESS_LO, 0);
++ gdxcbar_write32(MPCOHTRK_GDXC_OCLA_ADDRESS_HI, 0);
++ gdxcbar_write32(MPCOHTRK_GDXC_OCLA_REGION, 0);
++
++ gdxcbar_setbits32(MPCOHTRK_GDXC_OCLA_ADDRESS_HI, MPCOHTRK_GDXC_OCLA_ADDRESS_HI_LOCK);
++}
++
++enum raminit_status configure_memory_map(struct sysinfo *ctrl)
++{
++ struct memory_map_data memory_map = {
++ .tom = ctrl->channel_size_mb[0] + ctrl->channel_size_mb[1],
++ .dpr_size = CONFIG_INTEL_TXT_DPR_SIZE,
++ .tseg_size = CONFIG_SMM_TSEG_SIZE >> 20,
++ .me_stolen_size = intel_early_me_uma_size(),
++ };
++ /** FIXME: MRC hardcodes iGPU parameters, but we should not **/
++ const bool igpu_on = pci_read_config32(HOST_BRIDGE, DEVEN) & DEVEN_D2EN;
++ if (CONFIG(ONBOARD_VGA_IS_PRIMARY) || igpu_on) {
++ memory_map.gtt_size = 2;
++ memory_map.gms_size = 64;
++ pci_or_config32(HOST_BRIDGE, DEVEN, DEVEN_D2EN);
++ }
++ compute_memory_map(&memory_map);
++ display_memory_map(&memory_map);
++ program_memory_map(&memory_map);
++ return 0;
++}
+diff --git a/src/northbridge/intel/haswell/native_raminit/raminit_main.c b/src/northbridge/intel/haswell/native_raminit/raminit_main.c
+index 3a773cfa19..136a8ba989 100644
+--- a/src/northbridge/intel/haswell/native_raminit/raminit_main.c
++++ b/src/northbridge/intel/haswell/native_raminit/raminit_main.c
+@@ -24,6 +24,7 @@ static const struct task_entry cold_boot[] = {
+ { initialise_mpll, true, "INITMPLL", },
+ { convert_timings, true, "CONVTIM", },
+ { configure_mc, true, "CONFMC", },
++ { configure_memory_map, true, "MEMMAP", },
+ };
+
+ /* Return a generic stepping value to make stepping checks simpler */
+diff --git a/src/northbridge/intel/haswell/native_raminit/raminit_native.h b/src/northbridge/intel/haswell/native_raminit/raminit_native.h
+index cd1f2eb2a5..4763b25e8d 100644
+--- a/src/northbridge/intel/haswell/native_raminit/raminit_native.h
++++ b/src/northbridge/intel/haswell/native_raminit/raminit_native.h
+@@ -202,6 +202,7 @@ enum raminit_status collect_spd_info(struct sysinfo *ctrl);
+ enum raminit_status initialise_mpll(struct sysinfo *ctrl);
+ enum raminit_status convert_timings(struct sysinfo *ctrl);
+ enum raminit_status configure_mc(struct sysinfo *ctrl);
++enum raminit_status configure_memory_map(struct sysinfo *ctrl);
+
+ void configure_timings(struct sysinfo *ctrl);
+ void configure_refresh(struct sysinfo *ctrl);
+diff --git a/src/northbridge/intel/haswell/registers/host_bridge.h b/src/northbridge/intel/haswell/registers/host_bridge.h
+index 1ee0ab2890..0228cf6bb9 100644
+--- a/src/northbridge/intel/haswell/registers/host_bridge.h
++++ b/src/northbridge/intel/haswell/registers/host_bridge.h
+@@ -34,6 +34,8 @@
+
+ #define MESEG_BASE 0x70 /* Management Engine Base */
+ #define MESEG_LIMIT 0x78 /* Management Engine Limit */
++#define MELCK (1 << 10) /* ME Range Lock */
++#define ME_STLEN_EN (1 << 11) /* ME Stolen Memory Enable */
+
+ #define PAM0 0x80
+ #define PAM1 0x81
+--
+2.39.2
+
diff --git a/resources/coreboot/haswell/patches/0016-haswell-NRI-Add-DDR3-JEDEC-reset-and-init.patch b/resources/coreboot/haswell/patches/0016-haswell-NRI-Add-DDR3-JEDEC-reset-and-init.patch
new file mode 100644
index 00000000..c321d239
--- /dev/null
+++ b/resources/coreboot/haswell/patches/0016-haswell-NRI-Add-DDR3-JEDEC-reset-and-init.patch
@@ -0,0 +1,1038 @@
+From d24def01ec15f41a48331ef1e236270b2df90b84 Mon Sep 17 00:00:00 2001
+From: Angel Pons <th3fanbus@gmail.com>
+Date: Sat, 7 May 2022 21:49:40 +0200
+Subject: [PATCH 16/26] haswell NRI: Add DDR3 JEDEC reset and init
+
+Implement JEDEC reset and init sequence for DDR3. The MRS commands are
+issued through the REUT (Robust Electrical Unified Testing) hardware.
+
+Change-Id: I2a0c066537021b587599228086727cb1e041bff5
+Signed-off-by: Angel Pons <th3fanbus@gmail.com>
+---
+ .../intel/haswell/native_raminit/Makefile.inc | 3 +
+ .../intel/haswell/native_raminit/ddr3.c | 217 ++++++++++++++++++
+ .../haswell/native_raminit/io_comp_control.c | 19 ++
+ .../haswell/native_raminit/jedec_reset.c | 120 ++++++++++
+ .../haswell/native_raminit/raminit_main.c | 2 +
+ .../haswell/native_raminit/raminit_native.h | 101 ++++++++
+ .../haswell/native_raminit/reg_structs.h | 154 +++++++++++++
+ .../intel/haswell/native_raminit/reut.c | 196 ++++++++++++++++
+ .../intel/haswell/registers/mchbar.h | 21 ++
+ src/southbridge/intel/lynxpoint/pch.h | 2 +
+ 10 files changed, 835 insertions(+)
+ create mode 100644 src/northbridge/intel/haswell/native_raminit/ddr3.c
+ create mode 100644 src/northbridge/intel/haswell/native_raminit/jedec_reset.c
+ create mode 100644 src/northbridge/intel/haswell/native_raminit/reut.c
+
+diff --git a/src/northbridge/intel/haswell/native_raminit/Makefile.inc b/src/northbridge/intel/haswell/native_raminit/Makefile.inc
+index 37d527e972..e9212df9e6 100644
+--- a/src/northbridge/intel/haswell/native_raminit/Makefile.inc
++++ b/src/northbridge/intel/haswell/native_raminit/Makefile.inc
+@@ -1,11 +1,14 @@
+ ## SPDX-License-Identifier: GPL-2.0-or-later
+
+ romstage-y += configure_mc.c
++romstage-y += ddr3.c
++romstage-y += jedec_reset.c
+ romstage-y += lookup_timings.c
+ romstage-y += init_mpll.c
+ romstage-y += io_comp_control.c
+ romstage-y += memory_map.c
+ romstage-y += raminit_main.c
+ romstage-y += raminit_native.c
++romstage-y += reut.c
+ romstage-y += spd_bitmunching.c
+ romstage-y += timings_refresh.c
+diff --git a/src/northbridge/intel/haswell/native_raminit/ddr3.c b/src/northbridge/intel/haswell/native_raminit/ddr3.c
+new file mode 100644
+index 0000000000..6ddb11488b
+--- /dev/null
++++ b/src/northbridge/intel/haswell/native_raminit/ddr3.c
+@@ -0,0 +1,217 @@
++/* SPDX-License-Identifier: GPL-2.0-or-later */
++
++#include <assert.h>
++#include <console/console.h>
++#include <northbridge/intel/haswell/haswell.h>
++#include <types.h>
++
++#include "raminit_native.h"
++
++#define DDR3_RTTNOM(a, b, c) (((a) << 9) | ((b) << 6) | ((c) << 2))
++
++uint16_t encode_ddr3_rttnom(const uint32_t rttnom)
++{
++ switch (rttnom) {
++ case 0: return DDR3_RTTNOM(0, 0, 0); /* RttNom is disabled */
++ case 20: return DDR3_RTTNOM(1, 0, 0); /* RZQ/12 */
++ case 30: return DDR3_RTTNOM(1, 0, 1); /* RZQ/8 */
++ case 40: return DDR3_RTTNOM(0, 1, 1); /* RZQ/6 */
++ case 60: return DDR3_RTTNOM(0, 0, 1); /* RZQ/4 */
++ case 120: return DDR3_RTTNOM(0, 1, 0); /* RZQ/2 */
++ }
++ printk(BIOS_ERR, "%s: Invalid rtt_nom value %u\n", __func__, rttnom);
++ return 0;
++}
++
++static const uint8_t jedec_wr_t[12] = { 1, 2, 3, 4, 5, 5, 6, 6, 7, 7, 0, 0 };
++
++static void ddr3_program_mr0(struct sysinfo *ctrl, const uint8_t dll_reset)
++{
++ assert(ctrl->tWR >= 5 && ctrl->tWR <= 16);
++ assert(ctrl->tAA >= 4);
++ const uint8_t jedec_cas = ctrl->tAA - 4;
++ const union {
++ struct __packed {
++ uint16_t burst_length : 2; // Bits 1:0
++ uint16_t cas_latency_msb : 1; // Bits 2:2
++ uint16_t read_burst_type : 1; // Bits 3:3
++ uint16_t cas_latency_low : 3; // Bits 6:4
++ uint16_t test_mode : 1; // Bits 7:7
++ uint16_t dll_reset : 1; // Bits 8:8
++ uint16_t write_recovery : 3; // Bits 11:9
++ uint16_t precharge_pd_dll : 1; // Bits 12:12
++ uint16_t : 3; // Bits 15:13
++ };
++ uint16_t raw;
++ } mr0reg = {
++ .burst_length = 0,
++ .cas_latency_msb = !!(jedec_cas & BIT(3)),
++ .read_burst_type = 0,
++ .cas_latency_low = jedec_cas & 0x7,
++ .dll_reset = 1,
++ .write_recovery = jedec_wr_t[ctrl->tWR - 5],
++ .precharge_pd_dll = 0,
++ };
++ for (uint8_t channel = 0; channel < NUM_CHANNELS; channel++) {
++ if (!does_ch_exist(ctrl, channel))
++ continue;
++
++ for (uint8_t slot = 0; slot < NUM_SLOTS; slot++) {
++ if (!rank_in_ch(ctrl, slot + slot, channel))
++ continue;
++
++ if (!ctrl->restore_mrs)
++ ctrl->mr0[channel][slot] = mr0reg.raw;
++ }
++ reut_issue_mrs_all(ctrl, channel, 0, ctrl->mr0[channel]);
++ }
++}
++
++void ddr3_program_mr1(struct sysinfo *ctrl, const uint8_t wl_mode, const uint8_t q_off)
++{
++ /*
++ * JESD79-3F (JEDEC DDR3 spec) refers to bit 0 of MR1 as 'DLL Enable'.
++ * However, its encoding is weird, and 'DLL Disable' makes more sense.
++ *
++ * Moreover, bit 5 is part of ODIC (Output Driver Impedance Control),
++ * but all encodings where MR1 bit 5 is 1 are reserved. Thus, omit it.
++ */
++ union {
++ struct __packed {
++ uint16_t dll_disable : 1; // Bits 0:0
++ uint16_t od_impedance_ctl : 1; // Bits 1:1
++ uint16_t odt_rtt_nom_low : 1; // Bits 2:2
++ uint16_t additive_latency : 2; // Bits 4:3
++ uint16_t : 1; // Bits 5:5
++ uint16_t odt_rtt_nom_mid : 1; // Bits 6:6
++ uint16_t write_level_mode : 1; // Bits 7:7
++ uint16_t : 1; // Bits 8:8
++ uint16_t odt_rtt_nom_high : 1; // Bits 9:9
++ uint16_t : 1; // Bits 10:10
++ uint16_t t_dqs : 1; // Bits 11:11
++ uint16_t q_off : 1; // Bits 12:12
++ uint16_t : 3; // Bits 15:13
++ };
++ uint16_t raw;
++ } mr1reg = {
++ .dll_disable = 0,
++ .od_impedance_ctl = 1, /* RZQ/7 */
++ .additive_latency = 0,
++ .write_level_mode = wl_mode,
++ .t_dqs = 0,
++ .q_off = q_off,
++ };
++ for (uint8_t channel = 0; channel < NUM_CHANNELS; channel++) {
++ if (!does_ch_exist(ctrl, channel))
++ continue;
++
++ mr1reg.raw &= ~RTTNOM_MASK;
++ mr1reg.raw |= encode_ddr3_rttnom(ctrl->dpc[channel] == 2 ? 60 : 0);
++ for (uint8_t slot = 0; slot < NUM_SLOTS; slot++) {
++ if (!rank_in_ch(ctrl, slot + slot, channel))
++ continue;
++
++ if (!ctrl->restore_mrs)
++ ctrl->mr1[channel][slot] = mr1reg.raw;
++ }
++ reut_issue_mrs_all(ctrl, channel, 1, ctrl->mr1[channel]);
++ }
++}
++
++enum {
++ RTT_WR_OFF = 0,
++ RTT_WR_60 = 1,
++ RTT_WR_120 = 2,
++};
++
++static void ddr3_program_mr2(struct sysinfo *ctrl)
++{
++ assert(ctrl->tCWL >= 5);
++ const bool dimm_srt = ctrl->flags.ext_temp_refresh && !ctrl->flags.asr;
++
++ const union {
++ struct __packed {
++ uint16_t partial_array_sr : 3; // Bits 0:2
++ uint16_t cas_write_latency : 3; // Bits 5:3
++ uint16_t auto_self_refresh : 1; // Bits 6:6
++ uint16_t self_refresh_temp : 1; // Bits 7:7
++ uint16_t : 1; // Bits 8:8
++ uint16_t odt_rtt_wr : 2; // Bits 10:9
++ uint16_t : 5; // Bits 15:11
++ };
++ uint16_t raw;
++ } mr2reg = {
++ .partial_array_sr = 0,
++ .cas_write_latency = ctrl->tCWL - 5,
++ .auto_self_refresh = ctrl->flags.asr,
++ .self_refresh_temp = dimm_srt,
++ .odt_rtt_wr = is_hsw_ult() ? RTT_WR_120 : RTT_WR_60,
++ };
++ for (uint8_t channel = 0; channel < NUM_CHANNELS; channel++) {
++ if (!does_ch_exist(ctrl, channel))
++ continue;
++
++ for (uint8_t slot = 0; slot < NUM_SLOTS; slot++) {
++ if (!rank_in_ch(ctrl, slot + slot, channel))
++ continue;
++
++ if (!ctrl->restore_mrs)
++ ctrl->mr2[channel][slot] = mr2reg.raw;
++ }
++ /* MR2 shadow register is similar but not identical to MR2 */
++ if (!ctrl->restore_mrs) {
++ union tc_mr2_shadow_reg tc_mr2_shadow = {
++ .raw = mr2reg.raw & 0x073f,
++ };
++ for (uint8_t slot = 0; slot < NUM_SLOTS; slot++) {
++ if (!rank_in_ch(ctrl, slot + slot, channel))
++ continue;
++
++ if (dimm_srt)
++ tc_mr2_shadow.srt_available |= BIT(slot);
++
++ if (ctrl->rank_mirrored[channel] & BIT(slot + slot + 1))
++ tc_mr2_shadow.addr_bit_swizzle |= BIT(slot);
++ }
++ mchbar_write32(TC_MR2_SHADOW_ch(channel), tc_mr2_shadow.raw);
++ }
++ reut_issue_mrs_all(ctrl, channel, 2, ctrl->mr2[channel]);
++ }
++}
++
++static void ddr3_program_mr3(struct sysinfo *ctrl, const uint8_t mpr_mode)
++{
++ const union {
++ struct __packed {
++ uint16_t mpr_loc : 2; // Bits 1:0
++ uint16_t mpr_mode : 1; // Bits 2:2
++ uint16_t : 13; // Bits 15:3
++ };
++ uint16_t raw;
++ } mr3reg = {
++ .mpr_loc = 0,
++ .mpr_mode = mpr_mode,
++ };
++ for (uint8_t channel = 0; channel < NUM_CHANNELS; channel++) {
++ if (!does_ch_exist(ctrl, channel))
++ continue;
++
++ for (uint8_t slot = 0; slot < NUM_SLOTS; slot++) {
++ if (!rank_in_ch(ctrl, slot + slot, channel))
++ continue;
++
++ if (!ctrl->restore_mrs)
++ ctrl->mr3[channel][slot] = mr3reg.raw;
++ }
++ reut_issue_mrs_all(ctrl, channel, 3, ctrl->mr3[channel]);
++ }
++}
++
++enum raminit_status ddr3_jedec_init(struct sysinfo *ctrl)
++{
++ ddr3_program_mr2(ctrl);
++ ddr3_program_mr3(ctrl, 0);
++ ddr3_program_mr1(ctrl, 0, 0);
++ ddr3_program_mr0(ctrl, 1);
++ return reut_issue_zq(ctrl, ctrl->chanmap, ZQ_INIT);
++}
+diff --git a/src/northbridge/intel/haswell/native_raminit/io_comp_control.c b/src/northbridge/intel/haswell/native_raminit/io_comp_control.c
+index 7e96c08938..ad8c848e57 100644
+--- a/src/northbridge/intel/haswell/native_raminit/io_comp_control.c
++++ b/src/northbridge/intel/haswell/native_raminit/io_comp_control.c
+@@ -8,6 +8,25 @@
+
+ #include "raminit_native.h"
+
++enum raminit_status io_reset(void)
++{
++ union mc_init_state_g_reg mc_init_state_g = {
++ .raw = mchbar_read32(MC_INIT_STATE_G),
++ };
++ mc_init_state_g.reset_io = 1;
++ mchbar_write32(MC_INIT_STATE_G, mc_init_state_g.raw);
++ struct stopwatch timer;
++ stopwatch_init_msecs_expire(&timer, 2000);
++ do {
++ mc_init_state_g.raw = mchbar_read32(MC_INIT_STATE_G);
++ if (mc_init_state_g.reset_io == 0)
++ return RAMINIT_STATUS_SUCCESS;
++
++ } while (!stopwatch_expired(&timer));
++ printk(BIOS_ERR, "Timed out waiting for DDR I/O reset to complete\n");
++ return RAMINIT_STATUS_POLL_TIMEOUT;
++}
++
+ enum raminit_status wait_for_first_rcomp(void)
+ {
+ struct stopwatch timer;
+diff --git a/src/northbridge/intel/haswell/native_raminit/jedec_reset.c b/src/northbridge/intel/haswell/native_raminit/jedec_reset.c
+new file mode 100644
+index 0000000000..de0f676758
+--- /dev/null
++++ b/src/northbridge/intel/haswell/native_raminit/jedec_reset.c
+@@ -0,0 +1,120 @@
++/* SPDX-License-Identifier: GPL-2.0-or-later */
++
++#include <console/console.h>
++#include <delay.h>
++#include <northbridge/intel/haswell/haswell.h>
++#include <southbridge/intel/lynxpoint/pch.h>
++#include <types.h>
++#include <timer.h>
++
++#include "raminit_native.h"
++
++static void assert_reset(const bool do_reset)
++{
++ if (is_hsw_ult()) {
++ uint32_t pm_cfg2 = RCBA32(PM_CFG2);
++ if (do_reset)
++ pm_cfg2 &= ~PM_CFG2_DRAM_RESET_CTL;
++ else
++ pm_cfg2 |= PM_CFG2_DRAM_RESET_CTL;
++ RCBA32(PM_CFG2) = pm_cfg2;
++ } else {
++ union mc_init_state_g_reg mc_init_state_g = {
++ .raw = mchbar_read32(MC_INIT_STATE_G),
++ };
++ mc_init_state_g.ddr_not_reset = !do_reset;
++ mchbar_write32(MC_INIT_STATE_G, mc_init_state_g.raw);
++ }
++}
++
++/*
++ * Perform JEDEC reset.
++ *
++ * If RTT_NOM is to be enabled in MR1, the ODT input signal must be
++ * statically held low in our system since RTT_NOM is always enabled.
++ */
++static void jedec_reset(struct sysinfo *ctrl)
++{
++ if (is_hsw_ult())
++ assert_reset(false);
++
++ union mc_init_state_g_reg mc_init_state_g = {
++ .ddr_not_reset = 1,
++ .safe_self_refresh = 1,
++ };
++ mchbar_write32(MC_INIT_STATE_G, mc_init_state_g.raw);
++
++ union reut_misc_cke_ctrl_reg reut_misc_cke_ctrl = {
++ .cke_override = 0xf,
++ .cke_on = 0,
++ };
++ mchbar_write32(REUT_MISC_CKE_CTRL, reut_misc_cke_ctrl.raw);
++
++ assert_reset(true);
++
++ /** TODO: check and switch DDR3 voltage here (mainboard-specific) **/
++
++ udelay(200);
++
++ assert_reset(false);
++
++ udelay(500);
++
++ mc_init_state_g.dclk_enable = 1;
++ mchbar_write32(MC_INIT_STATE_G, mc_init_state_g.raw);
++
++ /* Delay at least 20 nanoseconds for tCKSRX */
++ tick_delay(1);
++
++ for (uint8_t channel = 0; channel < NUM_CHANNELS; channel++) {
++ reut_misc_cke_ctrl.cke_on = ctrl->rankmap[channel];
++ mchbar_write32(REUT_ch_MISC_CKE_CTRL(channel), reut_misc_cke_ctrl.raw);
++ }
++
++ /*
++ * Wait minimum of reset CKE exit time, tXPR.
++ * Spec says MAX(tXS, 5 tCK). 5 tCK is 10 ns.
++ */
++ tick_delay(1);
++}
++
++enum raminit_status do_jedec_init(struct sysinfo *ctrl)
++{
++ /* Never do a JEDEC reset in S3 resume */
++ if (ctrl->bootmode == BOOTMODE_S3)
++ return RAMINIT_STATUS_SUCCESS;
++
++ enum raminit_status status = io_reset();
++ if (status)
++ return status;
++
++ status = wait_for_first_rcomp();
++ if (status)
++ return status;
++
++ /* Force ODT low (JEDEC spec) */
++ const union reut_misc_odt_ctrl_reg reut_misc_odt_ctrl = {
++ .odt_override = 0xf,
++ .odt_on = 0,
++ };
++ mchbar_write32(REUT_MISC_ODT_CTRL, reut_misc_odt_ctrl.raw);
++
++ /*
++ * Note: Haswell MRC does not clear ODT override for LPDDR3. However,
++ * Broadwell MRC does. Hell suspects this difference is important, as
++ * there is an erratum in the specification update for Broadwell:
++ *
++ * Erratum BDM74: LPDDR3 Memory Training May Cause Platform Boot Failure
++ */
++ if (ctrl->lpddr)
++ die("%s: LPDDR-specific JEDEC init not implemented\n", __func__);
++
++ jedec_reset(ctrl);
++ status = ddr3_jedec_init(ctrl);
++ if (!status)
++ ctrl->restore_mrs = true;
++
++ /* Release ODT override */
++ mchbar_write32(REUT_MISC_ODT_CTRL, 0);
++ return status;
++}
+diff --git a/src/northbridge/intel/haswell/native_raminit/raminit_main.c b/src/northbridge/intel/haswell/native_raminit/raminit_main.c
+index 136a8ba989..73ff180b8c 100644
+--- a/src/northbridge/intel/haswell/native_raminit/raminit_main.c
++++ b/src/northbridge/intel/haswell/native_raminit/raminit_main.c
+@@ -25,6 +25,7 @@ static const struct task_entry cold_boot[] = {
+ { convert_timings, true, "CONVTIM", },
+ { configure_mc, true, "CONFMC", },
+ { configure_memory_map, true, "MEMMAP", },
++ { do_jedec_init, true, "JEDECINIT", },
+ };
+
+ /* Return a generic stepping value to make stepping checks simpler */
+@@ -58,6 +59,7 @@ static void initialize_ctrl(struct sysinfo *ctrl)
+ ctrl->stepping = get_stepping(ctrl->cpu);
+ ctrl->vdd_mv = is_hsw_ult() ? 1350 : 1500; /** FIXME: Hardcoded, does it matter? **/
+ ctrl->dq_pins_interleaved = cfg->dq_pins_interleaved;
++ ctrl->restore_mrs = false;
+ ctrl->bootmode = bootmode;
+ }
+
+diff --git a/src/northbridge/intel/haswell/native_raminit/raminit_native.h b/src/northbridge/intel/haswell/native_raminit/raminit_native.h
+index 4763b25e8d..e3cf4254a0 100644
+--- a/src/northbridge/intel/haswell/native_raminit/raminit_native.h
++++ b/src/northbridge/intel/haswell/native_raminit/raminit_native.h
+@@ -27,6 +27,30 @@
+ /* Always use 12 legs for emphasis (not trained) */
+ #define TXEQFULLDRV (3 << 4)
+
++/* DDR3 mode register bits */
++#define MR0_DLL_RESET BIT(8)
++
++#define MR1_WL_ENABLE BIT(7)
++#define MR1_QOFF_ENABLE BIT(12) /* If set, output buffers disabled */
++
++#define RTTNOM_MASK (BIT(9) | BIT(6) | BIT(2))
++
++/* ZQ calibration types */
++enum {
++ ZQ_INIT, /* DDR3: ZQCL with tZQinit, LPDDR3: ZQ Init with tZQinit */
++ ZQ_LONG, /* DDR3: ZQCL with tZQoper, LPDDR3: ZQ Long with tZQCL */
++ ZQ_SHORT, /* DDR3: ZQCS with tZQCS, LPDDR3: ZQ Short with tZQCS */
++ ZQ_RESET, /* DDR3: not used, LPDDR3: ZQ Reset with tZQreset */
++};
++
++/* REUT initialisation modes */
++enum {
++ REUT_MODE_IDLE = 0,
++ REUT_MODE_TEST = 1,
++ REUT_MODE_MRS = 2,
++ REUT_MODE_NOP = 3, /* Normal operation mode */
++};
++
+ enum command_training_iteration {
+ CT_ITERATION_CLOCK = 0,
+ CT_ITERATION_CMD_NORTH,
+@@ -50,6 +74,7 @@ enum raminit_status {
+ RAMINIT_STATUS_UNSUPPORTED_MEMORY,
+ RAMINIT_STATUS_MPLL_INIT_FAILURE,
+ RAMINIT_STATUS_POLL_TIMEOUT,
++ RAMINIT_STATUS_REUT_ERROR,
+ RAMINIT_STATUS_UNSPECIFIED_ERROR, /** TODO: Deprecated in favor of specific values **/
+ };
+
+@@ -72,6 +97,7 @@ struct sysinfo {
+ uint32_t cpu; /* CPUID value */
+
+ bool dq_pins_interleaved;
++ bool restore_mrs;
+
+ /** TODO: ECC support untested **/
+ bool is_ecc;
+@@ -161,6 +187,11 @@ struct sysinfo {
+ union tc_bank_rank_b_reg tc_bankrank_b[NUM_CHANNELS];
+ union tc_bank_rank_c_reg tc_bankrank_c[NUM_CHANNELS];
+ union tc_bank_rank_d_reg tc_bankrank_d[NUM_CHANNELS];
++
++ uint16_t mr0[NUM_CHANNELS][NUM_SLOTRANKS];
++ uint16_t mr1[NUM_CHANNELS][NUM_SLOTRANKS];
++ uint16_t mr2[NUM_CHANNELS][NUM_SLOTRANKS];
++ uint16_t mr3[NUM_CHANNELS][NUM_SLOTRANKS];
+ };
+
+ static inline bool is_hsw_ult(void)
+@@ -196,6 +227,55 @@ static inline void clear_data_offset_train_all(struct sysinfo *ctrl)
+ memset(ctrl->data_offset_train, 0, sizeof(ctrl->data_offset_train));
+ }
+
++/* Number of ticks to wait in units of 69.841279 ns (citation needed) */
++static inline void tick_delay(const uint32_t delay)
++{
++ volatile uint32_t junk;
++
++ /* Just perform reads to a random register */
++ for (uint32_t start = 0; start <= delay; start++)
++ junk = mchbar_read32(REUT_ERR_DATA_STATUS);
++}
++
++/*
++ * 64-bit MCHBAR registers need to be accessed atomically. If one uses
++ * two 32-bit ops instead, there will be problems with the REUT's CADB
++ * (Command Address Data Buffer): hardware automatically advances the
++ * pointer into the register file after a write to the input register.
++ */
++static inline uint64_t mchbar_read64(const uintptr_t x)
++{
++ const uint64_t *offset = (uint64_t *)(CONFIG_FIXED_MCHBAR_MMIO_BASE + x);
++ uint64_t mmxsave, v;
++ asm volatile (
++ "\n\t movq %%mm0, %0"
++ "\n\t movq %2, %%mm0"
++ "\n\t movq %%mm0, %1"
++ "\n\t movq %3, %%mm0"
++ "\n\t emms"
++ : "=m"(mmxsave),
++ "=m"(v)
++ : "m"(offset[0]),
++ "m"(mmxsave));
++ return v;
++}
++
++static inline void mchbar_write64(const uintptr_t x, const uint64_t v)
++{
++ const uint64_t *offset = (uint64_t *)(CONFIG_FIXED_MCHBAR_MMIO_BASE + x);
++ uint64_t mmxsave;
++ asm volatile (
++ "\n\t movq %%mm0, %0"
++ "\n\t movq %2, %%mm0"
++ "\n\t movq %%mm0, %1"
++ "\n\t movq %3, %%mm0"
++ "\n\t emms"
++ : "=m"(mmxsave)
++ : "m"(offset[0]),
++ "m"(v),
++ "m"(mmxsave));
++}
++
+ void raminit_main(enum raminit_boot_mode bootmode);
+
+ enum raminit_status collect_spd_info(struct sysinfo *ctrl);
+@@ -203,6 +283,7 @@ enum raminit_status initialise_mpll(struct sysinfo *ctrl);
+ enum raminit_status convert_timings(struct sysinfo *ctrl);
+ enum raminit_status configure_mc(struct sysinfo *ctrl);
+ enum raminit_status configure_memory_map(struct sysinfo *ctrl);
++enum raminit_status do_jedec_init(struct sysinfo *ctrl);
+
+ void configure_timings(struct sysinfo *ctrl);
+ void configure_refresh(struct sysinfo *ctrl);
+@@ -215,8 +296,28 @@ uint32_t get_tXS_offset(uint32_t mem_clock_mhz);
+ uint32_t get_tZQOPER(uint32_t mem_clock_mhz, bool lpddr);
+ uint32_t get_tZQCS(uint32_t mem_clock_mhz, bool lpddr);
+
++enum raminit_status io_reset(void);
+ enum raminit_status wait_for_first_rcomp(void);
+
++uint16_t encode_ddr3_rttnom(uint32_t rttnom);
++void ddr3_program_mr1(struct sysinfo *ctrl, uint8_t wl_mode, uint8_t q_off);
++enum raminit_status ddr3_jedec_init(struct sysinfo *ctrl);
++
++void reut_issue_mrs(
++ struct sysinfo *ctrl,
++ uint8_t channel,
++ uint8_t rankmask,
++ uint8_t mr,
++ uint16_t val);
++
++void reut_issue_mrs_all(
++ struct sysinfo *ctrl,
++ uint8_t channel,
++ uint8_t mr,
++ const uint16_t val[NUM_SLOTS]);
++
++enum raminit_status reut_issue_zq(struct sysinfo *ctrl, uint8_t chanmask, uint8_t zq_type);
++
+ uint8_t get_rx_bias(const struct sysinfo *ctrl);
+
+ uint8_t get_tCWL(uint32_t mem_clock_mhz);
+diff --git a/src/northbridge/intel/haswell/native_raminit/reg_structs.h b/src/northbridge/intel/haswell/native_raminit/reg_structs.h
+index 70487e1640..9929f617fe 100644
+--- a/src/northbridge/intel/haswell/native_raminit/reg_structs.h
++++ b/src/northbridge/intel/haswell/native_raminit/reg_structs.h
+@@ -335,6 +335,127 @@ union mcscheds_cbit_reg {
+ uint32_t raw;
+ };
+
++union reut_pat_cadb_prog_reg {
++ struct __packed {
++ uint32_t addr : 16; // Bits 15:0
++ uint32_t : 8; // Bits 23:16
++ uint32_t bank : 3; // Bits 26:24
++ uint32_t : 5; // Bits 31:27
++ uint32_t cs : 4; // Bits 35:32
++ uint32_t : 4; // Bits 39:36
++ uint32_t cmd : 3; // Bits 42:40
++ uint32_t : 5; // Bits 47:43
++ uint32_t odt : 4; // Bits 51:48
++ uint32_t : 4; // Bits 55:52
++ uint32_t cke : 4; // Bits 59:56
++ uint32_t : 4; // Bits 63:60
++ };
++ uint64_t raw;
++ uint32_t raw32[2];
++};
++
++union reut_pat_cadb_mrs_reg {
++ struct __packed {
++ uint32_t delay_gap : 3; // Bits 2:0
++ uint32_t : 5; // Bits 7:3
++ uint32_t start_ptr : 3; // Bits 10:8
++ uint32_t : 5; // Bits 15:11
++ uint32_t end_ptr : 3; // Bits 18:16
++ uint32_t : 5; // Bits 23:19
++ uint32_t curr_ptr : 3; // Bits 26:24
++ uint32_t : 5; // Bits 31:27
++ };
++ uint32_t raw;
++};
++
++union reut_seq_cfg_reg {
++ struct __packed {
++ uint32_t : 3; // Bits 2:0
++ uint32_t stop_base_seq_on_wrap_trigger : 1; // Bits 3:3
++ uint32_t : 1; // Bits 4:4
++ uint32_t address_update_rate_mode : 1; // Bits 5:5
++ uint32_t : 1; // Bits 6:6
++ uint32_t enable_dummy_reads : 1; // Bits 7:7
++ uint32_t : 2; // Bits 9:8
++ uint32_t enable_constant_write_strobe : 1; // Bits 10:10
++ uint32_t global_control : 1; // Bits 11:11
++ uint32_t initialization_mode : 2; // Bits 13:12
++ uint32_t : 2; // Bits 15:14
++ uint32_t early_steppings_loop_count : 5; // Bits 20:16 *** Not on C0 ***
++ uint32_t : 3; // Bits 23:21
++ uint32_t subsequence_start_pointer : 3; // Bits 26:24
++ uint32_t : 1; // Bits 27:27
++ uint32_t subsequence_end_pointer : 3; // Bits 30:28
++ uint32_t : 1; // Bits 31:31
++ uint32_t start_test_delay : 10; // Bits 41:32
++ uint32_t : 22; // Bits 63:42
++ };
++ uint64_t raw;
++ uint32_t raw32[2];
++};
++
++union reut_seq_ctl_reg {
++ struct __packed {
++ uint32_t start_test : 1; // Bits 0:0
++ uint32_t stop_test : 1; // Bits 1:1
++ uint32_t clear_errors : 1; // Bits 2:2
++ uint32_t : 1; // Bits 3:3
++ uint32_t stop_on_error : 1; // Bits 4:4
++ uint32_t : 27; // Bits 31:5
++ };
++ uint32_t raw;
++};
++
++union reut_global_err_reg {
++ struct __packed {
++ uint32_t ch_error : 2; // Bits 1:0
++ uint32_t : 14; // Bits 15:2
++ uint32_t ch_test_done : 2; // Bits 17:16
++ uint32_t : 14; // Bits 31:18
++ };
++ uint32_t raw;
++};
++
++union reut_misc_cke_ctrl_reg {
++ struct __packed {
++ uint32_t cke_override : 4; // Bits 3:0
++ uint32_t : 4; // Bits 7:4
++ uint32_t cke_en_start_test_sync : 1; // Bits 8:8
++ uint32_t : 7; // Bits 15:9
++ uint32_t cke_on : 4; // Bits 19:16
++ uint32_t : 12; // Bits 31:20
++ };
++ uint32_t raw;
++};
++
++union reut_misc_odt_ctrl_reg {
++ struct __packed {
++ uint32_t odt_override : 4; // Bits 3:0
++ uint32_t : 12; // Bits 15:4
++ uint32_t odt_on : 4; // Bits 19:16
++ uint32_t : 11; // Bits 30:20
++ uint32_t mpr_train_ddr_on : 1; // Bits 31:31
++ };
++ uint32_t raw;
++};
++
++union mcscheds_dft_misc_reg {
++ struct __packed {
++ uint32_t wdar : 1; // Bits 0:0
++ uint32_t safe_mask_sel : 3; // Bits 3:1
++ uint32_t force_rcv_en : 1; // Bits 4:4
++ uint32_t : 3; // Bits 7:5
++ uint32_t ddr_qualifier : 2; // Bits 9:8
++ uint32_t qualifier_length : 2; // Bits 11:10
++ uint32_t wdb_block_en : 1; // Bits 12:12
++ uint32_t rt_dft_read_ptr : 4; // Bits 16:13
++ uint32_t rt_dft_read_enable : 1; // Bits 17:17
++ uint32_t rt_dft_read_sel_addr : 1; // Bits 18:18
++ uint32_t : 13; // Bits 31:19
++ };
++ uint32_t raw;
++};
++
+ union tc_bank_reg {
+ struct __packed {
+ uint32_t tRCD : 5; // Bits 4:0
+@@ -428,6 +549,18 @@ union tc_srftp_reg {
+ uint32_t raw;
+ };
+
++union tc_mr2_shadow_reg {
++ struct __packed {
++ uint32_t mr2_shadow_low : 6; // Bits 5:0
++ uint32_t srt_available : 2; // Bits 7:6
++ uint32_t mr2_shadow_high : 3; // Bits 10:8
++ uint32_t : 3; // Bits 13:11
++ uint32_t addr_bit_swizzle : 2; // Bits 15:14
++ uint32_t : 16; // Bits 31:16
++ };
++ uint32_t raw;
++};
++
+ union mcmain_command_rate_limit_reg {
+ struct __packed {
+ uint32_t enable_cmd_limit : 1; // Bits 0:0
+@@ -483,6 +616,27 @@ union mad_zr_reg {
+ uint32_t raw;
+ };
+
++union mc_init_state_g_reg {
++ struct __packed {
++ uint32_t pu_mrc_done : 1; // Bits 0:0
++ uint32_t ddr_not_reset : 1; // Bits 1:1
++ uint32_t : 1; // Bits 2:2
++ uint32_t refresh_enable : 1; // Bits 3:3
++ uint32_t : 1; // Bits 4:4
++ uint32_t mc_init_done_ack : 1; // Bits 5:5
++ uint32_t : 1; // Bits 6:6
++ uint32_t mrc_done : 1; // Bits 7:7
++ uint32_t safe_self_refresh : 1; // Bits 8:8
++ uint32_t : 1; // Bits 9:9
++ uint32_t hvm_gate_ddr_reset : 1; // Bits 10:10
++ uint32_t : 11; // Bits 21:11
++ uint32_t dclk_enable : 1; // Bits 22:22
++ uint32_t reset_io : 1; // Bits 23:23
++ uint32_t : 8; // Bits 31:24
++ };
++ uint32_t raw;
++};
++
+ /* Same definition for P_COMP, M_COMP, D_COMP */
+ union pcu_comp_reg {
+ struct __packed {
+diff --git a/src/northbridge/intel/haswell/native_raminit/reut.c b/src/northbridge/intel/haswell/native_raminit/reut.c
+new file mode 100644
+index 0000000000..c55cdd9c7e
+--- /dev/null
++++ b/src/northbridge/intel/haswell/native_raminit/reut.c
+@@ -0,0 +1,196 @@
++/* SPDX-License-Identifier: GPL-2.0-or-later */
++
++#include <console/console.h>
++#include <delay.h>
++#include <northbridge/intel/haswell/haswell.h>
++#include <timer.h>
++#include <types.h>
++
++#include "raminit_native.h"
++
++enum {
++ CADB_CMD_MRS = 0,
++ CADB_CMD_REF = 1,
++ CADB_CMD_PRE = 2,
++ CADB_CMD_ACT = 3,
++ CADB_CMD_WR = 4,
++ CADB_CMD_RD = 5,
++ CADB_CMD_ZQ = 6,
++ CADB_CMD_NOP = 7,
++};
++
++/*
++ * DDR3 rank mirror swaps the following pins: A3<->A4, A5<->A6, A7<->A8, BA0<->BA1
++ *
++ * Note that the swapped bits are contiguous. We can use some XOR magic to swap the bits.
++ * Address lanes are at bits 0..15 and bank selects are at bits 24..26 on the REUT register.
++ */
++#define MIRROR_BITS (BIT(24) | BIT(7) | BIT(5) | BIT(3))
++static uint64_t cadb_prog_rank_mirror(const uint64_t cadb_prog)
++{
++ /* First XOR: find which pairs of bits are different (need swapping) */
++ const uint64_t tmp64 = (cadb_prog ^ (cadb_prog >> 1)) & MIRROR_BITS;
++
++ /* Second XOR: invert the pairs of bits that have different values */
++ return cadb_prog ^ (tmp64 | tmp64 << 1);
++}
++
++static enum raminit_status reut_write_cadb_cmd(
++ struct sysinfo *ctrl,
++ const uint8_t channel,
++ const uint8_t rankmask,
++ const uint8_t cmd,
++ const uint8_t bank,
++ const uint16_t valarr[NUM_SLOTRANKS],
++ const uint8_t delay)
++{
++ union mcscheds_dft_misc_reg dft_misc = {
++ .raw = mchbar_read32(MCSCHEDS_DFT_MISC),
++ };
++ dft_misc.ddr_qualifier = 0;
++ mchbar_write32(MCSCHEDS_DFT_MISC, dft_misc.raw);
++
++ /* Pointer will be dynamically incremented after a write to CADB_PROG register */
++ mchbar_write8(REUT_ch_PAT_CADB_WRITE_PTR(channel), 0);
++
++ uint8_t count = 0;
++ for (uint8_t rank = 0; rank < NUM_SLOTRANKS; rank++) {
++ if (!(ctrl->rankmap[channel] & BIT(rank) & rankmask))
++ continue;
++
++ union reut_pat_cadb_prog_reg reut_cadb_prog = {
++ .addr = valarr[rank],
++ .bank = bank,
++ .cs = ~BIT(rank), /* CS is active low */
++ .cmd = cmd,
++ .cke = 0xf,
++ };
++ if (ctrl->rank_mirrored[channel] & BIT(rank))
++ reut_cadb_prog.raw = cadb_prog_rank_mirror(reut_cadb_prog.raw);
++
++ mchbar_write64(REUT_ch_PAT_CADB_PROG(channel), reut_cadb_prog.raw);
++ count++;
++ }
++ if (!count) {
++ printk(BIOS_ERR, "%s: rankmask is invalid\n", __func__);
++ return RAMINIT_STATUS_UNSPECIFIED_ERROR; /** FIXME: Is this needed? **/
++ }
++ const union reut_pat_cadb_mrs_reg reut_cadb_mrs = {
++ .delay_gap = delay ? delay : 3,
++ .end_ptr = count - 1,
++ };
++ mchbar_write32(REUT_ch_PAT_CADB_MRS(channel), reut_cadb_mrs.raw);
++
++ const uint32_t reut_seq_cfg_save = mchbar_read32(REUT_ch_SEQ_CFG(channel));
++ union reut_seq_cfg_reg reut_seq_cfg = {
++ .raw = reut_seq_cfg_save,
++ };
++ reut_seq_cfg.global_control = 0;
++ reut_seq_cfg.initialization_mode = REUT_MODE_MRS;
++ mchbar_write32(REUT_ch_SEQ_CFG(channel), reut_seq_cfg.raw);
++ mchbar_write32(REUT_ch_SEQ_CTL(channel), (union reut_seq_ctl_reg) {
++ .start_test = 1,
++ .clear_errors = 1,
++ }.raw);
++ enum raminit_status status = RAMINIT_STATUS_SUCCESS;
++ union reut_global_err_reg reut_global_err;
++ struct stopwatch timer;
++ stopwatch_init_msecs_expire(&timer, 100);
++ do {
++ reut_global_err.raw = mchbar_read32(REUT_GLOBAL_ERR);
++ if (reut_global_err.ch_error & BIT(channel)) {
++ printk(BIOS_ERR, "Unexpected REUT error for channel %u\n", channel);
++ status = RAMINIT_STATUS_REUT_ERROR;
++ break;
++ }
++ if (stopwatch_expired(&timer)) {
++ printk(BIOS_ERR, "%s: REUT timed out!\n", __func__);
++ status = RAMINIT_STATUS_POLL_TIMEOUT;
++ break;
++ }
++ } while (!(reut_global_err.ch_test_done & BIT(channel)));
++ mchbar_write32(REUT_ch_SEQ_CTL(channel), (union reut_seq_ctl_reg) {
++ .clear_errors = 1,
++ }.raw);
++ mchbar_write32(REUT_ch_SEQ_CFG(channel), reut_seq_cfg_save);
++ return status;
++}
++
++static enum raminit_status reut_write_cadb_cmd_all(
++ struct sysinfo *ctrl,
++ const uint8_t channel,
++ const uint8_t rankmask,
++ const uint8_t cmd,
++ const uint8_t bank,
++ const uint16_t val,
++ const uint8_t delay)
++{
++ const uint16_t valarr[NUM_SLOTRANKS] = { val, val, val, val };
++ return reut_write_cadb_cmd(ctrl, channel, rankmask, cmd, bank, valarr, delay);
++}
++
++void reut_issue_mrs(
++ struct sysinfo *ctrl,
++ const uint8_t channel,
++ const uint8_t rankmask,
++ const uint8_t mr,
++ const uint16_t val)
++{
++ reut_write_cadb_cmd_all(ctrl, channel, rankmask, CADB_CMD_MRS, mr, val, 0);
++}
++
++void reut_issue_mrs_all(
++ struct sysinfo *ctrl,
++ const uint8_t channel,
++ const uint8_t mr,
++ const uint16_t val[NUM_SLOTS])
++{
++ const uint16_t valarr[NUM_SLOTRANKS] = { val[0], val[0], val[1], val[1] };
++ reut_write_cadb_cmd(ctrl, channel, 0xf, CADB_CMD_MRS, mr, valarr, 0);
++}
++
++enum raminit_status reut_issue_zq(struct sysinfo *ctrl, uint8_t chanmask, uint8_t zq_type)
++{
++ /** TODO: Issuing ZQ commands differs for LPDDR **/
++ if (ctrl->lpddr)
++ die("%s: LPDDR not yet supported in ZQ calibration\n");
++
++ uint8_t opcode; /* NOTE: Only used for LPDDR */
++ uint16_t zq = 0;
++ switch (zq_type) {
++ case ZQ_INIT:
++ zq = BIT(10);
++ opcode = 0xff;
++ break;
++ case ZQ_LONG:
++ zq = BIT(10);
++ opcode = 0xab;
++ break;
++ case ZQ_SHORT:
++ opcode = 0x56;
++ break;
++ case ZQ_RESET:
++ opcode = 0xc3;
++ break;
++ default:
++ die("%s: ZQ type %u is invalid\n", zq_type);
++ }
++
++ /* ZQCS on single-channel needs a longer delay */
++ const uint8_t delay = zq_type == ZQ_SHORT && (!ctrl->dpc[0] || !ctrl->dpc[1]) ? 7 : 1;
++ enum raminit_status status = RAMINIT_STATUS_SUCCESS;
++ for (uint8_t channel = 0; channel < NUM_CHANNELS; channel++) {
++ if (!(BIT(channel) & chanmask) || !does_ch_exist(ctrl, channel))
++ continue;
++
++ status = reut_write_cadb_cmd_all(ctrl, channel, 0xf, CADB_CMD_ZQ, 0, zq, delay);
++ if (status)
++ break;
++ }
++
++ /* Wait a bit after ZQ INIT and ZQCL commands */
++ if (zq)
++ udelay(1);
++
++ return status;
++}
+diff --git a/src/northbridge/intel/haswell/registers/mchbar.h b/src/northbridge/intel/haswell/registers/mchbar.h
+index 2acc5cbbc8..4fc78a7f43 100644
+--- a/src/northbridge/intel/haswell/registers/mchbar.h
++++ b/src/northbridge/intel/haswell/registers/mchbar.h
+@@ -96,15 +96,36 @@
+
+ #define SC_WR_ADD_DELAY_ch(ch) _MCMAIN_C(0x40d0, ch)
+
++#define REUT_ch_MISC_CKE_CTRL(ch) _MCMAIN_C(0x4190, ch)
++
++#define REUT_ch_PAT_CADB_MRS(ch) _MCMAIN_C(0x419c, ch)
++
++#define REUT_ch_PAT_CADB_WRITE_PTR(ch) _MCMAIN_C(0x41bc, ch)
++#define REUT_ch_PAT_CADB_PROG(ch) _MCMAIN_C(0x41c0, ch)
++
+ #define TC_ZQCAL_ch(ch) _MCMAIN_C(0x4290, ch)
+ #define TC_RFP_ch(ch) _MCMAIN_C(0x4294, ch)
+ #define TC_RFTP_ch(ch) _MCMAIN_C(0x4298, ch)
++#define TC_MR2_SHADOW_ch(ch) _MCMAIN_C(0x429c, ch)
+ #define MC_INIT_STATE_ch(ch) _MCMAIN_C(0x42a0, ch)
+ #define TC_SRFTP_ch(ch) _MCMAIN_C(0x42a4, ch)
+
++#define REUT_GLOBAL_ERR 0x4804
++
++#define REUT_ch_SEQ_CFG(ch) (0x48a8 + 8 * (ch))
++
++#define REUT_ch_SEQ_CTL(ch) (0x48b8 + 4 * (ch))
++
+ /* MCMAIN broadcast */
+ #define MCSCHEDS_CBIT 0x4c20
+
++#define MCSCHEDS_DFT_MISC 0x4c30
++
++#define REUT_ERR_DATA_STATUS 0x4ce0
++
++#define REUT_MISC_CKE_CTRL 0x4d90
++#define REUT_MISC_ODT_CTRL 0x4d94
++
+ #define MCMNTS_SC_WDBWM 0x4f8c
+
+ /* MCDECS */
+diff --git a/src/southbridge/intel/lynxpoint/pch.h b/src/southbridge/intel/lynxpoint/pch.h
+index 74b4d50017..16bef5032a 100644
+--- a/src/southbridge/intel/lynxpoint/pch.h
++++ b/src/southbridge/intel/lynxpoint/pch.h
+@@ -586,6 +586,8 @@ void mainboard_config_rcba(void);
+ #define ACPIIRQEN 0x31e0 /* 32bit */
+ #define OIC 0x31fe /* 16bit */
+ #define PRSTS 0x3310 /* 32bit */
++#define PM_CFG2 0x333c /* 32bit */
++#define PM_CFG2_DRAM_RESET_CTL (1 << 26) /* ULT only */
+ #define PMSYNC_CONFIG 0x33c4 /* 32bit */
+ #define PMSYNC_CONFIG2 0x33cc /* 32bit */
+ #define SOFT_RESET_CTRL 0x38f4
+--
+2.39.2
+
diff --git a/resources/coreboot/haswell/patches/0017-haswell-NRI-Add-pre-training-steps.patch b/resources/coreboot/haswell/patches/0017-haswell-NRI-Add-pre-training-steps.patch
new file mode 100644
index 00000000..e4cea123
--- /dev/null
+++ b/resources/coreboot/haswell/patches/0017-haswell-NRI-Add-pre-training-steps.patch
@@ -0,0 +1,384 @@
+From 42e43eb210bbb172af8e5ad064326c4570be8654 Mon Sep 17 00:00:00 2001
+From: Angel Pons <th3fanbus@gmail.com>
+Date: Sat, 7 May 2022 23:12:18 +0200
+Subject: [PATCH 17/26] haswell NRI: Add pre-training steps
+
+Implement pre-training steps, which consist of enabling ECC I/O and
+filling the WDB (Write Data Buffer, stores test patterns) through a
+magic LDAT port.
+
+Change-Id: Ie2e09e3b218c4569ed8de5c5e1b05d491032e0f1
+Signed-off-by: Angel Pons <th3fanbus@gmail.com>
+---
+ .../intel/haswell/native_raminit/Makefile.inc | 1 +
+ .../haswell/native_raminit/raminit_main.c | 34 ++++
+ .../haswell/native_raminit/raminit_native.h | 24 +++
+ .../haswell/native_raminit/reg_structs.h | 45 +++++
+ .../intel/haswell/native_raminit/setup_wdb.c | 159 ++++++++++++++++++
+ .../intel/haswell/registers/mchbar.h | 9 +
+ 6 files changed, 272 insertions(+)
+ create mode 100644 src/northbridge/intel/haswell/native_raminit/setup_wdb.c
+
+diff --git a/src/northbridge/intel/haswell/native_raminit/Makefile.inc b/src/northbridge/intel/haswell/native_raminit/Makefile.inc
+index e9212df9e6..8d7d4e4db0 100644
+--- a/src/northbridge/intel/haswell/native_raminit/Makefile.inc
++++ b/src/northbridge/intel/haswell/native_raminit/Makefile.inc
+@@ -10,5 +10,6 @@ romstage-y += memory_map.c
+ romstage-y += raminit_main.c
+ romstage-y += raminit_native.c
+ romstage-y += reut.c
++romstage-y += setup_wdb.c
+ romstage-y += spd_bitmunching.c
+ romstage-y += timings_refresh.c
+diff --git a/src/northbridge/intel/haswell/native_raminit/raminit_main.c b/src/northbridge/intel/haswell/native_raminit/raminit_main.c
+index 73ff180b8c..5e4674957d 100644
+--- a/src/northbridge/intel/haswell/native_raminit/raminit_main.c
++++ b/src/northbridge/intel/haswell/native_raminit/raminit_main.c
+@@ -13,6 +13,39 @@
+
+ #include "raminit_native.h"
+
++static enum raminit_status pre_training(struct sysinfo *ctrl)
++{
++ /* Skip on S3 resume */
++ if (ctrl->bootmode == BOOTMODE_S3)
++ return RAMINIT_STATUS_SUCCESS;
++
++ for (uint8_t channel = 0; channel < NUM_CHANNELS; channel++) {
++ for (uint8_t slot = 0; slot < NUM_SLOTS; slot++) {
++ if (!rank_in_ch(ctrl, slot + slot, channel))
++ continue;
++
++ printk(RAM_DEBUG, "C%uS%u:\n", channel, slot);
++ printk(RAM_DEBUG, "\tMR0: 0x%04x\n", ctrl->mr0[channel][slot]);
++ printk(RAM_DEBUG, "\tMR1: 0x%04x\n", ctrl->mr1[channel][slot]);
++ printk(RAM_DEBUG, "\tMR2: 0x%04x\n", ctrl->mr2[channel][slot]);
++ printk(RAM_DEBUG, "\tMR3: 0x%04x\n", ctrl->mr3[channel][slot]);
++ printk(RAM_DEBUG, "\n");
++ }
++ if (ctrl->is_ecc) {
++ union mad_dimm_reg mad_dimm = {
++ .raw = mchbar_read32(MAD_DIMM(channel)),
++ };
++ /* Enable ECC I/O */
++ mad_dimm.ecc_mode = 1;
++ mchbar_write32(MAD_DIMM(channel), mad_dimm.raw);
++ /* Wait 4 usec after enabling the ECC I/O, needed by HW */
++ udelay(4);
++ }
++ }
++ setup_wdb(ctrl);
++ return RAMINIT_STATUS_SUCCESS;
++}
++
+ struct task_entry {
+ enum raminit_status (*task)(struct sysinfo *);
+ bool is_enabled;
+@@ -26,6 +59,7 @@ static const struct task_entry cold_boot[] = {
+ { configure_mc, true, "CONFMC", },
+ { configure_memory_map, true, "MEMMAP", },
+ { do_jedec_init, true, "JEDECINIT", },
++ { pre_training, true, "PRETRAIN", },
+ };
+
+ /* Return a generic stepping value to make stepping checks simpler */
+diff --git a/src/northbridge/intel/haswell/native_raminit/raminit_native.h b/src/northbridge/intel/haswell/native_raminit/raminit_native.h
+index e3cf4254a0..f29c2ec366 100644
+--- a/src/northbridge/intel/haswell/native_raminit/raminit_native.h
++++ b/src/northbridge/intel/haswell/native_raminit/raminit_native.h
+@@ -35,6 +35,13 @@
+
+ #define RTTNOM_MASK (BIT(9) | BIT(6) | BIT(2))
+
++#define BASIC_VA_PAT_SPREAD_8 0x01010101
++
++#define WDB_CACHE_LINE_SIZE 8
++
++#define NUM_WDB_CL_MUX_SEEDS 3
++#define NUM_CADB_MUX_SEEDS 3
++
+ /* ZQ calibration types */
+ enum {
+ ZQ_INIT, /* DDR3: ZQCL with tZQinit, LPDDR3: ZQ Init with tZQinit */
+@@ -318,6 +325,23 @@ void reut_issue_mrs_all(
+
+ enum raminit_status reut_issue_zq(struct sysinfo *ctrl, uint8_t chanmask, uint8_t zq_type);
+
++void write_wdb_fixed_pat(
++ const struct sysinfo *ctrl,
++ const uint8_t patterns[],
++ const uint8_t pat_mask[],
++ uint8_t spread,
++ uint16_t start);
++
++void write_wdb_va_pat(
++ const struct sysinfo *ctrl,
++ uint32_t agg_mask,
++ uint32_t vic_mask,
++ uint8_t vic_rot,
++ uint16_t start);
++
++void program_wdb_lfsr(const struct sysinfo *ctrl, bool cleanup);
++void setup_wdb(const struct sysinfo *ctrl);
++
+ uint8_t get_rx_bias(const struct sysinfo *ctrl);
+
+ uint8_t get_tCWL(uint32_t mem_clock_mhz);
+diff --git a/src/northbridge/intel/haswell/native_raminit/reg_structs.h b/src/northbridge/intel/haswell/native_raminit/reg_structs.h
+index 9929f617fe..7aa8d8c8b2 100644
+--- a/src/northbridge/intel/haswell/native_raminit/reg_structs.h
++++ b/src/northbridge/intel/haswell/native_raminit/reg_structs.h
+@@ -335,6 +335,18 @@ union mcscheds_cbit_reg {
+ uint32_t raw;
+ };
+
++union reut_pat_cl_mux_lmn_reg {
++ struct __packed {
++ uint32_t l_data_select : 1; // Bits 0:0
++ uint32_t en_sweep_freq : 1; // Bits 1:1
++ uint32_t : 6; // Bits 7:2
++ uint32_t l_counter : 8; // Bits 15:8
++ uint32_t m_counter : 8; // Bits 23:16
++ uint32_t n_counter : 8; // Bits 31:24
++ };
++ uint32_t raw;
++};
++
+ union reut_pat_cadb_prog_reg {
+ struct __packed {
+ uint32_t addr : 16; // Bits 15:0
+@@ -439,6 +451,39 @@ union reut_misc_odt_ctrl_reg {
+ uint32_t raw;
+ };
+
++union ldat_pdat_reg {
++ struct __packed {
++ uint32_t fast_addr : 12; // Bits 11:0
++ uint32_t : 4; // Bits 15:12
++ uint32_t addr_en : 1; // Bits 16:16
++ uint32_t seq_en : 1; // Bits 17:17
++ uint32_t pol_0 : 1; // Bits 18:18
++ uint32_t pol_1 : 1; // Bits 19:19
++ uint32_t cmd_a : 4; // Bits 23:20
++ uint32_t cmd_b : 4; // Bits 27:24
++ uint32_t cmd_c : 4; // Bits 31:28
++ };
++ uint32_t raw;
++};
++
++union ldat_sdat_reg {
++ struct __packed {
++ uint32_t bank_sel : 4; // Bits 3:0
++ uint32_t : 1; // Bits 4:4
++ uint32_t array_sel : 5; // Bits 9:5
++ uint32_t cmp : 1; // Bits 10:10
++ uint32_t replicate : 1; // Bits 11:11
++ uint32_t dword : 4; // Bits 15:12
++ uint32_t mode : 2; // Bits 17:16
++ uint32_t mpmap : 6; // Bits 23:18
++ uint32_t mpb_offset : 4; // Bits 27:24
++ uint32_t stage_en : 1; // Bits 28:28
++ uint32_t shadow : 2; // Bits 30:29
++ uint32_t : 1; // Bits 31:31
++ };
++ uint32_t raw;
++};
++
+ union mcscheds_dft_misc_reg {
+ struct __packed {
+ uint32_t wdar : 1; // Bits 0:0
+diff --git a/src/northbridge/intel/haswell/native_raminit/setup_wdb.c b/src/northbridge/intel/haswell/native_raminit/setup_wdb.c
+new file mode 100644
+index 0000000000..ec37c48415
+--- /dev/null
++++ b/src/northbridge/intel/haswell/native_raminit/setup_wdb.c
+@@ -0,0 +1,159 @@
++/* SPDX-License-Identifier: GPL-2.0-or-later */
++
++#include <northbridge/intel/haswell/haswell.h>
++#include <types.h>
++
++#include "raminit_native.h"
++
++static void ldat_write_cacheline(
++ const struct sysinfo *const ctrl,
++ const uint8_t chunk,
++ const uint16_t start,
++ const uint64_t data)
++{
++ for (uint8_t channel = 0; channel < NUM_CHANNELS; channel++) {
++ if (!does_ch_exist(ctrl, channel))
++ continue;
++
++ /*
++ * Do not do a 64-bit write here. The register is not aligned
++ * to a 64-bit boundary, which could potentially cause issues.
++ */
++ mchbar_write32(QCLK_ch_LDAT_DATA_IN_x(channel, 0), data & UINT32_MAX);
++ mchbar_write32(QCLK_ch_LDAT_DATA_IN_x(channel, 1), data >> 32);
++ /*
++ * Set REPLICATE = 0 as you don't want to replicate the data.
++ * Set BANK_SEL to the chunk you want to write the 64 bits to.
++ * Set ARRAY_SEL = 0 (the MC WDB) and MODE = 1.
++ */
++ const union ldat_sdat_reg ldat_sdat = {
++ .bank_sel = chunk,
++ .mode = 1,
++ };
++ mchbar_write32(QCLK_ch_LDAT_SDAT(channel), ldat_sdat.raw);
++ /*
++ * Finally, write the PDAT register indicating which cacheline
++ * of the WDB you want to write to by setting FAST_ADDR field
++ * to one of the 64 cache lines. Also set CMD_B in the PDAT
++ * register to 4'b1000, indicating that this is a LDAT write.
++ */
++ const union ldat_pdat_reg ldat_pdat = {
++ .fast_addr = MIN(start, 0xfff),
++ .cmd_b = 8,
++ };
++ mchbar_write32(QCLK_ch_LDAT_PDAT(channel), ldat_pdat.raw);
++ }
++}
++
++static void clear_ldat_mode(const struct sysinfo *const ctrl)
++{
++ for (uint8_t channel = 0; channel < NUM_CHANNELS; channel++)
++ mchbar_write32(QCLK_ch_LDAT_SDAT(channel), 0);
++}
++
++void write_wdb_fixed_pat(
++ const struct sysinfo *const ctrl,
++ const uint8_t patterns[],
++ const uint8_t pat_mask[],
++ const uint8_t spread,
++ const uint16_t start)
++{
++ for (uint8_t chunk = 0; chunk < WDB_CACHE_LINE_SIZE; chunk++) {
++ uint64_t data = 0;
++ for (uint8_t b = 0; b < 64; b++) {
++ const uint8_t beff = b % spread;
++ const uint8_t burst = patterns[pat_mask[beff]];
++ if (burst & BIT(chunk))
++ data |= 1ULL << b;
++ }
++ ldat_write_cacheline(ctrl, chunk, start, data);
++ }
++ clear_ldat_mode(ctrl);
++}
++
++static inline uint32_t rol_u32(const uint32_t val)
++{
++ return (val << 1) | ((val >> 31) & 1);
++}
++
++void write_wdb_va_pat(
++ const struct sysinfo *const ctrl,
++ const uint32_t agg_mask,
++ const uint32_t vic_mask,
++ const uint8_t vic_rot,
++ const uint16_t start)
++{
++ static const uint8_t va_mask_to_compressed[4] = {0xaa, 0xc0, 0xcc, 0xf0};
++ uint32_t v_mask = vic_mask;
++ uint32_t a_mask = agg_mask;
++ for (uint8_t v = 0; v < vic_rot; v++) {
++ uint8_t compressed[32] = {0};
++ /* Iterate through all 32 bits and create a compressed version of cacheline */
++ for (uint8_t b = 0; b < ARRAY_SIZE(compressed); b++) {
++ const uint8_t vic = !!(v_mask & BIT(b));
++ const uint8_t agg = !!(a_mask & BIT(b));
++ const uint8_t index = !vic << 1 | agg << 0;
++ compressed[b] = va_mask_to_compressed[index];
++ }
++ for (uint8_t chunk = 0; chunk < WDB_CACHE_LINE_SIZE; chunk++) {
++ uint32_t data = 0;
++ for (uint8_t b = 0; b < ARRAY_SIZE(compressed); b++)
++ data |= !!(compressed[b] & BIT(chunk)) << b;
++
++ const uint64_t data64 = (uint64_t)data << 32 | data;
++ ldat_write_cacheline(ctrl, chunk, start + v, data64);
++ }
++ v_mask = rol_u32(v_mask);
++ a_mask = rol_u32(a_mask);
++ }
++ clear_ldat_mode(ctrl);
++}
++
++void program_wdb_lfsr(const struct sysinfo *ctrl, const bool cleanup)
++{
++ /* Cleanup LFSR seeds are sequential */
++ const uint32_t cleanup_seeds[NUM_WDB_CL_MUX_SEEDS] = { 0xaaaaaa, 0xcccccc, 0xf0f0f0 };
++ const uint32_t regular_seeds[NUM_WDB_CL_MUX_SEEDS] = { 0xa10ca1, 0xef0d08, 0xad0a1e };
++ const uint32_t *seeds = cleanup ? cleanup_seeds : regular_seeds;
++
++ for (uint8_t channel = 0; channel < NUM_CHANNELS; channel++) {
++ if (!does_ch_exist(ctrl, channel))
++ continue;
++
++ for (uint8_t i = 0; i < NUM_WDB_CL_MUX_SEEDS; i++) {
++ mchbar_write32(REUT_ch_PAT_WDB_CL_MUX_RD_x(channel, i), seeds[i]);
++ mchbar_write32(REUT_ch_PAT_WDB_CL_MUX_WR_x(channel, i), seeds[i]);
++ }
++ }
++}
++
++void setup_wdb(const struct sysinfo *ctrl)
++{
++ const uint32_t amask[9] = {
++ 0x86186186, 0x18618618, 0x30c30c30,
++ 0xa28a28a2, 0x8a28a28a, 0x14514514,
++ 0x28a28a28, 0x92492492, 0x24924924,
++ };
++ const uint32_t vmask = 0x41041041;
++
++ /* Fill first 8 entries with simple 2-LFSR VA pattern */
++ write_wdb_va_pat(ctrl, 0, BASIC_VA_PAT_SPREAD_8, 8, 0);
++
++ /* Fill next 54 entries with 3-LFSR VA pattern */
++ for (uint8_t a = 0; a < ARRAY_SIZE(amask); a++)
++ write_wdb_va_pat(ctrl, amask[a], vmask, 6, 8 + a * 6);
++
++ program_wdb_lfsr(ctrl, false);
++ for (uint8_t channel = 0; channel < NUM_CHANNELS; channel++) {
++ if (!does_ch_exist(ctrl, channel))
++ continue;
++
++ const union reut_pat_cl_mux_lmn_reg wdb_cl_mux_lmn = {
++ .en_sweep_freq = 1,
++ .l_counter = 1,
++ .m_counter = 1,
++ .n_counter = 10,
++ };
++ mchbar_write32(REUT_ch_PAT_WDB_CL_MUX_LMN(channel), wdb_cl_mux_lmn.raw);
++ }
++}
+diff --git a/src/northbridge/intel/haswell/registers/mchbar.h b/src/northbridge/intel/haswell/registers/mchbar.h
+index 4fc78a7f43..f8408e51a0 100644
+--- a/src/northbridge/intel/haswell/registers/mchbar.h
++++ b/src/northbridge/intel/haswell/registers/mchbar.h
+@@ -94,6 +94,11 @@
+ #define TC_BANK_RANK_D_ch(ch) _MCMAIN_C(0x4014, ch)
+ #define SC_ROUNDT_LAT_ch(ch) _MCMAIN_C(0x4024, ch)
+
++#define REUT_ch_PAT_WDB_CL_MUX_WR_x(ch, x) _MCMAIN_C_X(0x4048, ch, x) /* x in 0 .. 2 */
++#define REUT_ch_PAT_WDB_CL_MUX_RD_x(ch, x) _MCMAIN_C_X(0x4054, ch, x) /* x in 0 .. 2 */
++
++#define REUT_ch_PAT_WDB_CL_MUX_LMN(ch) _MCMAIN_C(0x4078, ch)
++
+ #define SC_WR_ADD_DELAY_ch(ch) _MCMAIN_C(0x40d0, ch)
+
+ #define REUT_ch_MISC_CKE_CTRL(ch) _MCMAIN_C(0x4190, ch)
+@@ -110,6 +115,10 @@
+ #define MC_INIT_STATE_ch(ch) _MCMAIN_C(0x42a0, ch)
+ #define TC_SRFTP_ch(ch) _MCMAIN_C(0x42a4, ch)
+
++#define QCLK_ch_LDAT_PDAT(ch) _MCMAIN_C(0x42d0, ch)
++#define QCLK_ch_LDAT_SDAT(ch) _MCMAIN_C(0x42d4, ch)
++#define QCLK_ch_LDAT_DATA_IN_x(ch, x) _MCMAIN_C_X(0x42dc, ch, x) /* x in 0 .. 1 */
++
+ #define REUT_GLOBAL_ERR 0x4804
+
+ #define REUT_ch_SEQ_CFG(ch) (0x48a8 + 8 * (ch))
+--
+2.39.2
+
diff --git a/resources/coreboot/haswell/patches/0018-haswell-NRI-Add-REUT-I-O-test-library.patch b/resources/coreboot/haswell/patches/0018-haswell-NRI-Add-REUT-I-O-test-library.patch
new file mode 100644
index 00000000..5df22ed3
--- /dev/null
+++ b/resources/coreboot/haswell/patches/0018-haswell-NRI-Add-REUT-I-O-test-library.patch
@@ -0,0 +1,1128 @@
+From f4dd460d609276de7cb7db91f145a404451a2301 Mon Sep 17 00:00:00 2001
+From: Angel Pons <th3fanbus@gmail.com>
+Date: Sun, 8 May 2022 00:11:29 +0200
+Subject: [PATCH 18/26] haswell NRI: Add REUT I/O test library
+
+Implement a library to run I/O tests using the REUT hardware.
+
+Change-Id: Id7b207cd0a3989ddd23c88c6b1f0cfa79d2c861f
+Signed-off-by: Angel Pons <th3fanbus@gmail.com>
+---
+ .../intel/haswell/native_raminit/Makefile.inc | 1 +
+ .../haswell/native_raminit/raminit_native.h | 110 +++
+ .../haswell/native_raminit/reg_structs.h | 121 +++
+ .../intel/haswell/native_raminit/testing_io.c | 742 ++++++++++++++++++
+ .../intel/haswell/registers/mchbar.h | 30 +
+ 5 files changed, 1004 insertions(+)
+ create mode 100644 src/northbridge/intel/haswell/native_raminit/testing_io.c
+
+diff --git a/src/northbridge/intel/haswell/native_raminit/Makefile.inc b/src/northbridge/intel/haswell/native_raminit/Makefile.inc
+index 8d7d4e4db0..6e1b365602 100644
+--- a/src/northbridge/intel/haswell/native_raminit/Makefile.inc
++++ b/src/northbridge/intel/haswell/native_raminit/Makefile.inc
+@@ -12,4 +12,5 @@ romstage-y += raminit_native.c
+ romstage-y += reut.c
+ romstage-y += setup_wdb.c
+ romstage-y += spd_bitmunching.c
++romstage-y += testing_io.c
+ romstage-y += timings_refresh.c
+diff --git a/src/northbridge/intel/haswell/native_raminit/raminit_native.h b/src/northbridge/intel/haswell/native_raminit/raminit_native.h
+index f29c2ec366..56df36ca8d 100644
+--- a/src/northbridge/intel/haswell/native_raminit/raminit_native.h
++++ b/src/northbridge/intel/haswell/native_raminit/raminit_native.h
+@@ -58,6 +58,88 @@ enum {
+ REUT_MODE_NOP = 3, /* Normal operation mode */
+ };
+
++/* REUT error counter control */
++enum {
++ COUNT_ERRORS_PER_CHANNEL = 0,
++ COUNT_ERRORS_PER_LANE = 1,
++ COUNT_ERRORS_PER_BYTE_GROUP = 2,
++ COUNT_ERRORS_PER_CHUNK = 3,
++};
++
++enum wdb_dq_pattern {
++ BASIC_VA = 0,
++ SEGMENT_WDB,
++ CADB,
++ TURN_AROUND,
++ LMN_VA,
++ TURN_AROUND_WR,
++ TURN_AROUND_ODT,
++ RD_RD_TA,
++ RD_RD_TA_ALL,
++};
++
++enum reut_cmd_pat {
++ PAT_WR_RD,
++ PAT_WR,
++ PAT_RD,
++ PAT_RD_WR_TA,
++ PAT_WR_RD_TA,
++ PAT_ODT_TA,
++};
++
++/* REUT subsequence types (B = Base, O = Offset) */
++enum {
++ SUBSEQ_B_RD = 0 << 22,
++ SUBSEQ_B_WR = 1 << 22,
++ SUBSEQ_B_RD_WR = 2 << 22,
++ SUBSEQ_B_WR_RD = 3 << 22,
++ SUBSEQ_O_RD = 4 << 22,
++ SUBSEQ_O_WR = 5 << 22,
++};
++
++/* REUT mux control */
++enum {
++ REUT_MUX_LMN = 0,
++ REUT_MUX_BTBUFFER = 1,
++ REUT_MUX_LFSR = 2,
++};
++
++/* Increment scale */
++enum {
++ SCALE_LOGARITHM = 0,
++ SCALE_LINEAR = 1,
++};
++
++enum test_stop {
++ NSOE = 0, /* Never stop on error */
++ NTHSOE = 1, /* Stop on the nth error (we use n = 1) */
++ ABGSOE = 2, /* Stop on all byte groups error */
++ ALSOE = 3, /* Stop on all lanes error */
++};
++
++struct wdb_pat {
++ uint32_t start_ptr; /* Starting pointer in WDB */
++ uint32_t stop_ptr; /* Stopping pointer in WDB */
++ uint16_t inc_rate; /* How quickly the WDB walks through cachelines */
++ uint8_t dq_pattern; /* DQ pattern to use (see enum wdb_dq_pattern above) */
++};
++
++struct reut_pole {
++ uint16_t start;
++ uint16_t stop;
++ uint16_t order;
++ uint32_t inc_rate;
++ uint16_t inc_val;
++ bool wrap_trigger;
++};
++
++struct reut_box {
++ struct reut_pole rank;
++ struct reut_pole bank;
++ struct reut_pole row;
++ struct reut_pole col;
++};
++
+ enum command_training_iteration {
+ CT_ITERATION_CLOCK = 0,
+ CT_ITERATION_CMD_NORTH,
+@@ -199,6 +281,10 @@ struct sysinfo {
+ uint16_t mr1[NUM_CHANNELS][NUM_SLOTRANKS];
+ uint16_t mr2[NUM_CHANNELS][NUM_SLOTRANKS];
+ uint16_t mr3[NUM_CHANNELS][NUM_SLOTRANKS];
++
++ uint8_t dq_pat;
++
++ uint8_t dq_pat_lc;
+ };
+
+ static inline bool is_hsw_ult(void)
+@@ -342,6 +428,30 @@ void write_wdb_va_pat(
+ void program_wdb_lfsr(const struct sysinfo *ctrl, bool cleanup);
+ void setup_wdb(const struct sysinfo *ctrl);
+
++void program_seq_addr(uint8_t channel, const struct reut_box *reut_addr, bool log_seq_addr);
++void program_loop_count(const struct sysinfo *ctrl, uint8_t channel, uint8_t lc_exp);
++
++void setup_io_test(
++ struct sysinfo *ctrl,
++ uint8_t chanmask,
++ enum reut_cmd_pat cmd_pat,
++ uint16_t num_cl,
++ uint8_t lc,
++ const struct reut_box *reut_addr,
++ enum test_stop soe,
++ const struct wdb_pat *pat,
++ uint8_t en_cadb,
++ uint8_t subseq_wait);
++
++void setup_io_test_cadb(struct sysinfo *ctrl, uint8_t chanmask, uint8_t lc, enum test_stop soe);
++void setup_io_test_basic_va(struct sysinfo *ctrl, uint8_t chm, uint8_t lc, enum test_stop soe);
++void setup_io_test_mpr(struct sysinfo *ctrl, uint8_t chanmask, uint8_t lc, enum test_stop soe);
++
++uint8_t select_reut_ranks(struct sysinfo *ctrl, uint8_t channel, uint8_t rankmask);
++
++void run_mpr_io_test(bool clear_errors);
++uint8_t run_io_test(struct sysinfo *ctrl, uint8_t chanmask, uint8_t dq_pat, bool clear_errors);
++
+ uint8_t get_rx_bias(const struct sysinfo *ctrl);
+
+ uint8_t get_tCWL(uint32_t mem_clock_mhz);
+diff --git a/src/northbridge/intel/haswell/native_raminit/reg_structs.h b/src/northbridge/intel/haswell/native_raminit/reg_structs.h
+index 7aa8d8c8b2..b943259b91 100644
+--- a/src/northbridge/intel/haswell/native_raminit/reg_structs.h
++++ b/src/northbridge/intel/haswell/native_raminit/reg_structs.h
+@@ -347,6 +347,54 @@ union reut_pat_cl_mux_lmn_reg {
+ uint32_t raw;
+ };
+
++union reut_err_ctl_reg {
++ struct __packed {
++ uint32_t stop_on_nth_error : 6; // Bits 5:0
++ uint32_t : 6; // Bits 11:6
++ uint32_t stop_on_error_control : 2; // Bits 13:12
++ uint32_t : 2; // Bits 15:14
++ uint32_t selective_err_enable_chunk : 8; // Bits 23:16
++ uint32_t selective_err_enable_cacheline : 8; // Bits 31:24
++ };
++ uint32_t raw;
++};
++
++union reut_pat_cadb_mux_ctrl_reg {
++ struct __packed {
++ uint32_t mux_0_ctrl : 2; // Bits 1:0
++ uint32_t : 2; // Bits 3:2
++ uint32_t mux_1_ctrl : 2; // Bits 5:4
++ uint32_t : 2; // Bits 7:6
++ uint32_t mux_2_ctrl : 2; // Bits 9:8
++ uint32_t : 6; // Bits 15:10
++ uint32_t sel_mux_0_ctrl : 2; // Bits 17:16
++ uint32_t : 2; // Bits 19:18
++ uint32_t sel_mux_1_ctrl : 2; // Bits 21:20
++ uint32_t : 2; // Bits 23:22
++ uint32_t sel_mux_2_ctrl : 2; // Bits 25:24
++ uint32_t : 6; // Bits 31:26
++ };
++ uint32_t raw;
++};
++
++union reut_pat_wdb_cl_mux_cfg_reg {
++ struct __packed {
++ uint32_t mux_0_control : 2; // Bits 1:0
++ uint32_t : 1; // Bits 2:2
++ uint32_t mux_1_control : 2; // Bits 4:3
++ uint32_t : 1; // Bits 5:5
++ uint32_t mux_2_control : 2; // Bits 7:6
++ uint32_t : 6; // Bits 13:8
++ uint32_t ecc_replace_byte_ctl : 1; // Bits 14:14
++ uint32_t ecc_data_source_sel : 1; // Bits 15:15
++ uint32_t save_lfsr_seed_rate : 6; // Bits 21:16
++ uint32_t : 2; // Bits 23:22
++ uint32_t reload_lfsr_seed_rate : 3; // Bits 26:24
++ uint32_t : 5; // Bits 31:27
++ };
++ uint32_t raw;
++};
++
+ union reut_pat_cadb_prog_reg {
+ struct __packed {
+ uint32_t addr : 16; // Bits 15:0
+@@ -366,6 +414,19 @@ union reut_pat_cadb_prog_reg {
+ uint32_t raw32[2];
+ };
+
++union reut_pat_wdb_cl_ctrl_reg {
++ struct __packed {
++ uint32_t inc_rate : 5; // Bits 4:0
++ uint32_t inc_scale : 1; // Bits 5:5
++ uint32_t : 2; // Bits 7:6
++ uint32_t start_ptr : 6; // Bits 13:8
++ uint32_t : 2; // Bits 15:14
++ uint32_t end_ptr : 6; // Bits 21:16
++ uint32_t : 10; // Bits 31:22
++ };
++ uint32_t raw;
++};
++
+ union reut_pat_cadb_mrs_reg {
+ struct __packed {
+ uint32_t delay_gap : 3; // Bits 2:0
+@@ -406,6 +467,66 @@ union reut_seq_cfg_reg {
+ uint32_t raw32[2];
+ };
+
++union reut_seq_base_addr_reg {
++ struct __packed {
++ uint32_t : 3; // Bits 2:0
++ uint32_t col_addr : 8; // Bits 10:3
++ uint32_t : 13; // Bits 23:11
++ uint32_t row_addr : 16; // Bits 39:24
++ uint32_t : 8; // Bits 47:40
++ uint32_t bank_addr : 3; // Bits 50:48
++ uint32_t : 5; // Bits 55:51
++ uint32_t rank_addr : 3; // Bits 58:56
++ uint32_t : 5; // Bits 63:59
++ };
++ uint32_t raw32[2];
++ uint64_t raw;
++};
++
++union reut_seq_misc_ctl_reg {
++ struct __packed {
++ uint32_t col_addr_order : 2; // Bits 1:0
++ uint32_t row_addr_order : 2; // Bits 3:2
++ uint32_t bank_addr_order : 2; // Bits 5:4
++ uint32_t rank_addr_order : 2; // Bits 7:6
++ uint32_t : 5; // Bits 12:8
++ uint32_t addr_invert_rate : 3; // Bits 15:13
++ uint32_t : 4; // Bits 19:16
++ uint32_t col_addr_invert_en : 1; // Bits 20:20
++ uint32_t row_addr_invert_en : 1; // Bits 21:21
++ uint32_t bank_addr_invert_en : 1; // Bits 22:22
++ uint32_t rank_addr_invert_en : 1; // Bits 23:23
++ uint32_t col_wrap_trigger_en : 1; // Bits 24:24
++ uint32_t row_wrap_trigger_en : 1; // Bits 25:25
++ uint32_t bank_wrap_trigger_en : 1; // Bits 26:26
++ uint32_t rank_wrap_trigger_en : 1; // Bits 27:27
++ uint32_t col_wrap_carry_en : 1; // Bits 28:28
++ uint32_t row_wrap_carry_en : 1; // Bits 29:29
++ uint32_t bank_wrap_carry_en : 1; // Bits 30:30
++ uint32_t rank_wrap_carry_en : 1; // Bits 31:31
++ };
++ uint32_t raw;
++};
++
++union reut_seq_addr_inc_ctl_reg {
++ struct __packed {
++ uint32_t : 3; // Bits 2:0
++ uint32_t col_addr_increment : 8; // Bits 10:3
++ uint32_t : 1; // Bits 11:11
++ uint32_t col_addr_update : 8; // Bits 19:12
++ uint32_t row_addr_increment : 12; // Bits 31:20
++ uint32_t row_addr_update : 6; // Bits 37:32
++ uint32_t bank_addr_increment : 3; // Bits 40:38
++ uint32_t : 3; // Bits 43:41
++ uint32_t bank_addr_update : 8; // Bits 53:44
++ uint32_t rank_addr_increment : 3; // Bits 54:52
++ uint32_t : 1; // Bits 55:55
++ uint32_t rank_addr_update : 8; // Bits 63:56
++ };
++ uint64_t raw;
++ uint32_t raw32[2];
++};
++
+ union reut_seq_ctl_reg {
+ struct __packed {
+ uint32_t start_test : 1; // Bits 0:0
+diff --git a/src/northbridge/intel/haswell/native_raminit/testing_io.c b/src/northbridge/intel/haswell/native_raminit/testing_io.c
+new file mode 100644
+index 0000000000..7716fc4285
+--- /dev/null
++++ b/src/northbridge/intel/haswell/native_raminit/testing_io.c
+@@ -0,0 +1,742 @@
++/* SPDX-License-Identifier: GPL-2.0-or-later */
++
++#include <console/console.h>
++#include <delay.h>
++#include <lib.h>
++#include <northbridge/intel/haswell/haswell.h>
++#include <timer.h>
++#include <types.h>
++
++#include "raminit_native.h"
++
++static void set_cadb_patterns(const uint8_t channel, const uint16_t seeds[NUM_CADB_MUX_SEEDS])
++{
++ for (uint8_t i = 0; i < NUM_CADB_MUX_SEEDS; i++)
++ mchbar_write32(REUT_ch_PAT_CADB_MUX_x(channel, i), seeds[i]);
++}
++
++static void setup_cadb(
++ struct sysinfo *ctrl,
++ const uint8_t channel,
++ const uint8_t vic_spread,
++ const uint8_t vic_bit)
++{
++ const bool lmn_en = false;
++
++ /*
++ * Currently, always start writing at CADB row 0.
++ * Could add a start point parameter in the future.
++ */
++ mchbar_write8(REUT_ch_PAT_CADB_WRITE_PTR(channel), 0);
++ const uint8_t num_cadb_rows = 8;
++ for (uint8_t row = 0; row < num_cadb_rows; row++) {
++ const uint8_t lfsr0 = (row >> 0) & 1;
++ const uint8_t lfsr1 = (row >> 1) & 1;
++ uint64_t reg64 = 0;
++ for (uint8_t bit = 0; bit < 22; bit++) {
++ uint8_t bremap;
++ if (bit >= 19) {
++ /* (bremap in 40 .. 42) => CADB data control */
++ bremap = bit + 21;
++ } else if (bit >= 16) {
++ /* (bremap in 24 .. 26) => CADB data bank */
++ bremap = bit + 8;
++ } else {
++ /* (bremap in 0 .. 15) => CADB data address */
++ bremap = bit;
++ }
++ const uint8_t fine = bit % vic_spread;
++ reg64 |= ((uint64_t)(fine == vic_bit ? lfsr0 : lfsr1)) << bremap;
++ }
++ /*
++ * Write row. CADB pointer is auto incremented after every write. This must be
++ * a single 64-bit write, otherwise the CADB pointer will auto-increment twice.
++ */
++ mchbar_write64(REUT_ch_PAT_CADB_PROG(channel), reg64);
++ }
++ const union reut_pat_cadb_mux_ctrl_reg cadb_mux_ctrl = {
++ .mux_0_ctrl = lmn_en ? REUT_MUX_LMN : REUT_MUX_LFSR,
++ .mux_1_ctrl = REUT_MUX_LFSR,
++ .mux_2_ctrl = REUT_MUX_LFSR,
++ };
++ mchbar_write32(REUT_ch_PAT_CADB_MUX_CTRL(channel), cadb_mux_ctrl.raw);
++ const union reut_pat_cl_mux_lmn_reg cadb_cl_mux_lmn = {
++ .en_sweep_freq = 1,
++ .l_counter = 1,
++ .m_counter = 1,
++ .n_counter = 6,
++ };
++ mchbar_write32(REUT_ch_PAT_CADB_CL_MUX_LMN(channel), cadb_cl_mux_lmn.raw);
++ const uint16_t cadb_mux_seeds[NUM_CADB_MUX_SEEDS] = { 0x0ea1, 0xbeef, 0xdead };
++ set_cadb_patterns(channel, cadb_mux_seeds);
++}
++
++static uint32_t calc_rate(const uint32_t rate, const uint32_t lim, const uint8_t scale_bit)
++{
++ return rate > lim ? log2_ceil(rate - 1) : BIT(scale_bit) | rate;
++}
++
++void program_seq_addr(
++ const uint8_t channel,
++ const struct reut_box *reut_addr,
++ const bool log_seq_addr)
++{
++ const int loglevel = log_seq_addr ? BIOS_ERR : BIOS_NEVER;
++ const uint32_t div = 8;
++ union reut_seq_base_addr_reg reut_seq_addr_start = {
++ .col_addr = reut_addr->col.start / div,
++ .row_addr = reut_addr->row.start,
++ .bank_addr = reut_addr->bank.start,
++ .rank_addr = reut_addr->rank.start,
++ };
++ mchbar_write64(REUT_ch_SEQ_ADDR_START(channel), reut_seq_addr_start.raw);
++ reut_seq_addr_start.raw = mchbar_read64(REUT_ch_SEQ_ADDR_START(channel));
++ printk(loglevel, "\tStart column: %u\n", reut_seq_addr_start.col_addr);
++ printk(loglevel, "\tStart row: %u\n", reut_seq_addr_start.row_addr);
++ printk(loglevel, "\tStart bank: %u\n", reut_seq_addr_start.bank_addr);
++ printk(loglevel, "\tStart rank: %u\n", reut_seq_addr_start.rank_addr);
++ printk(loglevel, "\n");
++
++ union reut_seq_base_addr_reg reut_seq_addr_stop = {
++ .col_addr = reut_addr->col.stop / div,
++ .row_addr = reut_addr->row.stop,
++ .bank_addr = reut_addr->bank.stop,
++ .rank_addr = reut_addr->rank.stop,
++ };
++ mchbar_write64(REUT_ch_SEQ_ADDR_WRAP(channel), reut_seq_addr_stop.raw);
++ reut_seq_addr_stop.raw = mchbar_read64(REUT_ch_SEQ_ADDR_WRAP(channel));
++ printk(loglevel, "\tStop column: %u\n", reut_seq_addr_stop.col_addr);
++ printk(loglevel, "\tStop row: %u\n", reut_seq_addr_stop.row_addr);
++ printk(loglevel, "\tStop bank: %u\n", reut_seq_addr_stop.bank_addr);
++ printk(loglevel, "\tStop rank: %u\n", reut_seq_addr_stop.rank_addr);
++ printk(loglevel, "\n");
++
++ union reut_seq_misc_ctl_reg reut_seq_misc_ctl = {
++ .col_wrap_trigger_en = reut_addr->col.wrap_trigger,
++ .row_wrap_trigger_en = reut_addr->row.wrap_trigger,
++ .bank_wrap_trigger_en = reut_addr->bank.wrap_trigger,
++ .rank_wrap_trigger_en = reut_addr->rank.wrap_trigger,
++ };
++ mchbar_write32(REUT_ch_SEQ_MISC_CTL(channel), reut_seq_misc_ctl.raw);
++ printk(loglevel, "\tWrap column: %u\n", reut_addr->col.wrap_trigger);
++ printk(loglevel, "\tWrap row: %u\n", reut_addr->row.wrap_trigger);
++ printk(loglevel, "\tWrap bank: %u\n", reut_addr->bank.wrap_trigger);
++ printk(loglevel, "\tWrap rank: %u\n", reut_addr->rank.wrap_trigger);
++ printk(loglevel, "\n");
++
++ union reut_seq_addr_inc_ctl_reg reut_seq_addr_inc_ctl = {
++ .col_addr_update = calc_rate(reut_addr->col.inc_rate, 31, 7),
++ .row_addr_update = calc_rate(reut_addr->row.inc_rate, 15, 5),
++ .bank_addr_update = calc_rate(reut_addr->bank.inc_rate, 31, 7),
++ .rank_addr_update = calc_rate(reut_addr->rank.inc_rate, 31, 7),
++ .col_addr_increment = reut_addr->col.inc_val,
++ .row_addr_increment = reut_addr->row.inc_val,
++ .bank_addr_increment = reut_addr->bank.inc_val,
++ .rank_addr_increment = reut_addr->rank.inc_val,
++ };
++ printk(loglevel, "\tUpdRate column: %u\n", reut_addr->col.inc_rate);
++ printk(loglevel, "\tUpdRate row: %u\n", reut_addr->row.inc_rate);
++ printk(loglevel, "\tUpdRate bank: %u\n", reut_addr->bank.inc_rate);
++ printk(loglevel, "\tUpdRate rank: %u\n", reut_addr->rank.inc_rate);
++ printk(loglevel, "\n");
++ printk(loglevel, "\tUpdRateCR column: %u\n", reut_seq_addr_inc_ctl.col_addr_update);
++ printk(loglevel, "\tUpdRateCR row: %u\n", reut_seq_addr_inc_ctl.row_addr_update);
++ printk(loglevel, "\tUpdRateCR bank: %u\n", reut_seq_addr_inc_ctl.bank_addr_update);
++ printk(loglevel, "\tUpdRateCR rank: %u\n", reut_seq_addr_inc_ctl.rank_addr_update);
++ printk(loglevel, "\n");
++ printk(loglevel, "\tUpdInc column: %u\n", reut_seq_addr_inc_ctl.col_addr_increment);
++ printk(loglevel, "\tUpdInc row: %u\n", reut_seq_addr_inc_ctl.row_addr_increment);
++ printk(loglevel, "\tUpdInc bank: %u\n", reut_seq_addr_inc_ctl.bank_addr_increment);
++ printk(loglevel, "\tUpdInc rank: %u\n", reut_seq_addr_inc_ctl.rank_addr_increment);
++ printk(loglevel, "\n");
++ mchbar_write64(REUT_ch_SEQ_ADDR_INC_CTL(channel), reut_seq_addr_inc_ctl.raw);
++}
++
++/*
++ * Early steppings take exponential (base 2) loopcount values,
++ * but later steppings take linear loopcount values elsewhere.
++ * Address the differences in register offset and format here.
++ */
++void program_loop_count(const struct sysinfo *ctrl, const uint8_t channel, const uint8_t lc_exp)
++{
++ if (ctrl->stepping >= STEPPING_C0) {
++ const uint32_t loopcount = lc_exp >= 32 ? 0 : BIT(lc_exp);
++ mchbar_write32(HSW_REUT_ch_SEQ_LOOP_COUNT(channel), loopcount);
++ } else {
++ const uint8_t loopcount = lc_exp >= 32 ? 0 : lc_exp + 1;
++ union reut_seq_cfg_reg reut_seq_cfg = {
++ .raw = mchbar_read64(REUT_ch_SEQ_CFG(channel)),
++ };
++ reut_seq_cfg.early_steppings_loop_count = loopcount;
++ mchbar_write64(REUT_ch_SEQ_CFG(channel), reut_seq_cfg.raw);
++ }
++}
++
++static inline void write_subseq(const uint8_t channel, const uint8_t idx, const uint32_t ssq)
++{
++ mchbar_write32(REUT_ch_SUBSEQ_x_CTL(channel, idx), ssq);
++}
++
++static void program_subseq(
++ struct sysinfo *const ctrl,
++ const uint8_t channel,
++ const enum reut_cmd_pat cmd_pat,
++ const uint32_t ss_a,
++ const uint32_t ss_b)
++{
++ switch (cmd_pat) {
++ case PAT_WR_RD_TA:
++ write_subseq(channel, 0, ss_a | SUBSEQ_B_WR);
++ for (uint8_t i = 1; i < 7; i++)
++ write_subseq(channel, i, ss_b | SUBSEQ_B_RD_WR);
++
++ write_subseq(channel, 7, ss_a | SUBSEQ_B_RD);
++ break;
++ case PAT_RD_WR_TA:
++ write_subseq(channel, 0, ss_b | SUBSEQ_B_WR_RD);
++ break;
++ case PAT_ODT_TA:
++ write_subseq(channel, 0, ss_a | SUBSEQ_B_WR);
++ write_subseq(channel, 1, ss_b | SUBSEQ_B_RD_WR);
++ write_subseq(channel, 2, ss_a | SUBSEQ_B_RD);
++ write_subseq(channel, 3, ss_b | SUBSEQ_B_WR_RD);
++ break;
++ default:
++ write_subseq(channel, 0, ss_a | SUBSEQ_B_WR);
++ write_subseq(channel, 1, ss_a | SUBSEQ_B_RD);
++ break;
++ }
++}
++
++void setup_io_test(
++ struct sysinfo *ctrl,
++ const uint8_t chanmask,
++ const enum reut_cmd_pat cmd_pat,
++ const uint16_t num_cl,
++ const uint8_t lc,
++ const struct reut_box *const reut_addr,
++ const enum test_stop soe,
++ const struct wdb_pat *const pat,
++ const uint8_t en_cadb,
++ const uint8_t subseq_wait)
++{
++ if (!chanmask)
++ die("\n%s: invalid chanmask\n", __func__, chanmask);
++
++ /*
++ * Prepare variables needed for both channels.
++ * Check for the cases where this MUST be 1: when
++ * we manually walk through subseq ODT and TA Wr.
++ */
++ uint8_t lc_exp = MAX(lc - log2_ceil(num_cl), 0);
++ if (cmd_pat == PAT_WR_RD_TA || cmd_pat == PAT_ODT_TA)
++ lc_exp = 0;
++
++ uint8_t num_clcr;
++ if (num_cl > 127) {
++ /* Assume exponential number */
++ num_clcr = log2_ceil(num_cl);
++ } else {
++ /* Set number of cache lines as linear number */
++ num_clcr = num_cl | BIT(7);
++ }
++
++ const uint16_t num_cl2 = 2 * num_cl;
++ uint8_t num_cl2cr;
++ if (num_cl2 > 127) {
++ /* Assume exponential number */
++ num_cl2cr = log2_ceil(num_cl2);
++ } else {
++ /* Set number of cache lines as linear number */
++ num_cl2cr = num_cl2 | BIT(7);
++ }
++
++ for (uint8_t channel = 0; channel < NUM_CHANNELS; channel++) {
++ if (!(chanmask & BIT(channel))) {
++ union reut_seq_cfg_reg reut_seq_cfg = {
++ .raw = mchbar_read64(REUT_ch_SEQ_CFG(channel)),
++ };
++ reut_seq_cfg.global_control = 0;
++ mchbar_write64(REUT_ch_SEQ_CFG(channel), reut_seq_cfg.raw);
++ continue;
++ }
++
++ /*
++ * Program CADB
++ */
++ mchbar_write8(REUT_ch_MISC_PAT_CADB_CTRL(channel), !!en_cadb);
++ if (en_cadb)
++ setup_cadb(ctrl, channel, 7, 8);
++
++ /*
++ * Program sequence
++ */
++ uint8_t subseq_start = 0;
++ uint8_t subseq_end = 0;
++ switch (cmd_pat) {
++ case PAT_WR_RD:
++ subseq_end = 1;
++ break;
++ case PAT_WR:
++ break;
++ case PAT_RD:
++ subseq_start = 1;
++ subseq_end = 1;
++ break;
++ case PAT_RD_WR_TA:
++ break;
++ case PAT_WR_RD_TA:
++ subseq_end = 7;
++ break;
++ case PAT_ODT_TA:
++ subseq_end = 3;
++ break;
++ default:
++ die("\n%s: Pattern type %u is invalid\n", __func__, cmd_pat);
++ }
++ const union reut_seq_cfg_reg reut_seq_cfg = {
++ .global_control = 1,
++ .initialization_mode = REUT_MODE_TEST,
++ .subsequence_start_pointer = subseq_start,
++ .subsequence_end_pointer = subseq_end,
++ .start_test_delay = 2,
++ };
++ mchbar_write64(REUT_ch_SEQ_CFG(channel), reut_seq_cfg.raw);
++ program_loop_count(ctrl, channel, lc_exp);
++ mchbar_write32(REUT_ch_SEQ_CTL(channel), (union reut_seq_ctl_reg) {
++ .clear_errors = 1,
++ }.raw);
++
++ /*
++ * Program subsequences
++ */
++ uint32_t subseq_a = 0;
++
++ /* Number of cachelines and scale */
++ subseq_a |= (num_clcr & 0x00ff) << 0;
++ subseq_a |= (subseq_wait & 0x3fff) << 8;
++
++ /* Reset current base address to start */
++ subseq_a |= BIT(27);
++
++ uint32_t subseq_b = 0;
++
++ /* Number of cachelines and scale */
++ subseq_b |= (num_cl2cr & 0x00ff) << 0;
++ subseq_b |= (subseq_wait & 0x3fff) << 8;
++
++ /* Reset current base address to start */
++ subseq_b |= BIT(27);
++
++ program_subseq(ctrl, channel, cmd_pat, subseq_a, subseq_b);
++
++ /* Program sequence address */
++ program_seq_addr(channel, reut_addr, false);
++
++ /* Program WDB */
++ const bool is_linear = pat->inc_rate < 32;
++ mchbar_write32(REUT_ch_WDB_CL_CTRL(channel), (union reut_pat_wdb_cl_ctrl_reg) {
++ .start_ptr = pat->start_ptr,
++ .end_ptr = pat->stop_ptr,
++ .inc_rate = is_linear ? pat->inc_rate : log2_ceil(pat->inc_rate),
++ .inc_scale = is_linear,
++ }.raw);
++
++ /* Enable LMN in LMN or CADB modes, used to create lots of supply noise */
++ const bool use_lmn = pat->dq_pattern == LMN_VA || pat->dq_pattern == CADB;
++ union reut_pat_wdb_cl_mux_cfg_reg pat_wdb_cl_mux_cfg = {
++ .mux_0_control = use_lmn ? REUT_MUX_LMN : REUT_MUX_LFSR,
++ .mux_1_control = REUT_MUX_LFSR,
++ .mux_2_control = REUT_MUX_LFSR,
++ .ecc_data_source_sel = 1,
++ };
++
++ /* Program LFSR save/restore, too complex unless everything is power of 2 */
++ if (cmd_pat == PAT_ODT_TA || cmd_pat == PAT_WR_RD_TA) {
++ pat_wdb_cl_mux_cfg.reload_lfsr_seed_rate = log2_ceil(num_cl) + 1;
++ pat_wdb_cl_mux_cfg.save_lfsr_seed_rate = 1;
++ }
++ mchbar_write32(REUT_ch_PAT_WDB_CL_MUX_CFG(channel), pat_wdb_cl_mux_cfg.raw);
++
++ /* Inversion mask is not used */
++ mchbar_write32(REUT_ch_PAT_WDB_INV(channel), 0);
++
++ /* Program error checking */
++ const union reut_err_ctl_reg reut_err_ctl = {
++ .selective_err_enable_cacheline = 0xff,
++ .selective_err_enable_chunk = 0xff,
++ .stop_on_error_control = soe,
++ .stop_on_nth_error = 1,
++ };
++ mchbar_write32(REUT_ch_ERR_CONTROL(channel), reut_err_ctl.raw);
++ mchbar_write64(REUT_ch_ERR_DATA_MASK(channel), 0);
++ mchbar_write8(REUT_ch_ERR_ECC_MASK(channel), 0);
++ }
++
++ /* Always do a ZQ short before the beginning of a test */
++ reut_issue_zq(ctrl, chanmask, ZQ_SHORT);
++}
++
++void setup_io_test_cadb(
++ struct sysinfo *ctrl,
++ const uint8_t chanmask,
++ const uint8_t lc,
++ const enum test_stop soe)
++{
++ const struct reut_box reut_addr = {
++ .rank = {
++ .start = 0,
++ .stop = 0,
++ .inc_rate = 32,
++ .inc_val = 1,
++ },
++ .bank = {
++ .start = 0,
++ .stop = 7,
++ .inc_rate = 3,
++ .inc_val = 1,
++ },
++ .row = {
++ .start = 0,
++ .stop = 2047,
++ .inc_rate = 3,
++ .inc_val = 73,
++ },
++ .col = {
++ .start = 0,
++ .stop = 1023,
++ .inc_rate = 0,
++ .inc_val = 53,
++ },
++ };
++ const struct wdb_pat pattern = {
++ .start_ptr = 0,
++ .stop_ptr = 9,
++ .inc_rate = 4,
++ .dq_pattern = CADB,
++ };
++ setup_io_test(
++ ctrl,
++ chanmask,
++ PAT_WR_RD,
++ 128,
++ lc,
++ &reut_addr,
++ soe,
++ &pattern,
++ 1,
++ 0);
++
++ ctrl->dq_pat_lc = MAX(lc - 2 - 3, 0) + 1;
++ ctrl->dq_pat = CADB;
++}
++
++void setup_io_test_basic_va(
++ struct sysinfo *ctrl,
++ const uint8_t chanmask,
++ const uint8_t lc,
++ const enum test_stop soe)
++{
++ const uint32_t spread = 8;
++ const struct reut_box reut_addr = {
++ .rank = {
++ .start = 0,
++ .stop = 0,
++ .inc_rate = 32,
++ .inc_val = 1,
++ },
++ .col = {
++ .start = 0,
++ .stop = 1023,
++ .inc_rate = 0,
++ .inc_val = 1,
++ },
++ };
++ const struct wdb_pat pattern = {
++ .start_ptr = 0,
++ .stop_ptr = spread - 1,
++ .inc_rate = 4,
++ .dq_pattern = BASIC_VA,
++ };
++ setup_io_test(
++ ctrl,
++ chanmask,
++ PAT_WR_RD,
++ 128,
++ lc,
++ &reut_addr,
++ soe,
++ &pattern,
++ 0,
++ 0);
++
++ ctrl->dq_pat_lc = MAX(lc - 8, 0) + 1;
++ ctrl->dq_pat = BASIC_VA;
++}
++
++void setup_io_test_mpr(
++ struct sysinfo *ctrl,
++ const uint8_t chanmask,
++ const uint8_t lc,
++ const enum test_stop soe)
++{
++ const struct reut_box reut_addr_ddr = {
++ .rank = {
++ .start = 0,
++ .stop = 0,
++ .inc_rate = 32,
++ .inc_val = 1,
++ },
++ .col = {
++ .start = 0,
++ .stop = 1023,
++ .inc_rate = 0,
++ .inc_val = 1,
++ },
++ };
++ const struct reut_box reut_addr_lpddr = {
++ .bank = {
++ .start = 4,
++ .stop = 4,
++ .inc_rate = 0,
++ .inc_val = 0,
++ },
++ };
++ const struct wdb_pat pattern = {
++ .start_ptr = 0,
++ .stop_ptr = 9,
++ .inc_rate = 4,
++ .dq_pattern = BASIC_VA,
++ };
++ setup_io_test(
++ ctrl,
++ chanmask,
++ PAT_RD,
++ 128,
++ lc,
++ ctrl->lpddr ? &reut_addr_lpddr : &reut_addr_ddr,
++ soe,
++ &pattern,
++ 0,
++ 0);
++
++ ctrl->dq_pat_lc = 1;
++ ctrl->dq_pat = BASIC_VA;
++}
++
++uint8_t select_reut_ranks(struct sysinfo *ctrl, const uint8_t channel, uint8_t rankmask)
++{
++ rankmask &= ctrl->rankmap[channel];
++
++ uint8_t rank_count = 0;
++ uint32_t rank_log_to_phys = 0;
++ for (uint8_t rank = 0; rank < NUM_SLOTRANKS; rank++) {
++ if (!rank_in_mask(rank, rankmask))
++ continue;
++
++ rank_log_to_phys |= rank << (4 * rank_count);
++ rank_count++;
++ }
++ mchbar_write32(REUT_ch_RANK_LOG_TO_PHYS(channel), rank_log_to_phys);
++
++ union reut_seq_cfg_reg reut_seq_cfg = {
++ .raw = mchbar_read64(REUT_ch_SEQ_CFG(channel)),
++ };
++ if (!rank_count) {
++ reut_seq_cfg.global_control = 0;
++ mchbar_write64(REUT_ch_SEQ_CFG(channel), reut_seq_cfg.raw);
++ return 0;
++ }
++ union reut_seq_base_addr_reg reut_seq_addr_stop = {
++ .raw = mchbar_read64(REUT_ch_SEQ_ADDR_WRAP(channel)),
++ };
++ reut_seq_addr_stop.rank_addr = rank_count - 1;
++ mchbar_write64(REUT_ch_SEQ_ADDR_WRAP(channel), reut_seq_addr_stop.raw);
++
++ reut_seq_cfg.global_control = 1;
++ mchbar_write64(REUT_ch_SEQ_CFG(channel), reut_seq_cfg.raw);
++ return BIT(channel);
++}
++
++void run_mpr_io_test(const bool clear_errors)
++{
++ io_reset();
++ mchbar_write32(REUT_GLOBAL_CTL, (union reut_seq_ctl_reg) {
++ .start_test = 1,
++ .clear_errors = clear_errors,
++ }.raw);
++ tick_delay(2);
++ io_reset();
++ tick_delay(2);
++ mchbar_write32(REUT_GLOBAL_CTL, (union reut_seq_ctl_reg) {
++ .stop_test = 1,
++ }.raw);
++}
++
++static uint8_t get_num_tests(const uint8_t dq_pat)
++{
++ switch (dq_pat) {
++ case SEGMENT_WDB: return 4;
++ case CADB: return 7;
++ case TURN_AROUND_WR: return 8;
++ case TURN_AROUND_ODT: return 4;
++ case RD_RD_TA: return 2;
++ case RD_RD_TA_ALL: return 8;
++ default: return 1;
++ }
++}
++
++uint8_t run_io_test(
++ struct sysinfo *const ctrl,
++ const uint8_t chanmask,
++ const uint8_t dq_pat,
++ const bool clear_errors)
++{
++ /* SEGMENT_WDB only runs 4 tests */
++ const uint8_t segment_wdb_lc[4] = { 0, 0, 4, 2 };
++ const union reut_pat_wdb_cl_ctrl_reg pat_wdb_cl[4] = {
++ [0] = {
++ .start_ptr = 0,
++ .end_ptr = 9,
++ .inc_rate = 25,
++ .inc_scale = SCALE_LINEAR,
++ },
++ [1] = {
++ .start_ptr = 0,
++ .end_ptr = 9,
++ .inc_rate = 25,
++ .inc_scale = SCALE_LINEAR,
++ },
++ [2] = {
++ .start_ptr = 10,
++ .end_ptr = 63,
++ .inc_rate = 19,
++ .inc_scale = SCALE_LINEAR,
++ },
++ [3] = {
++ .start_ptr = 10,
++ .end_ptr = 63,
++ .inc_rate = 10,
++ .inc_scale = SCALE_LINEAR,
++ },
++ };
++ const bool is_turnaround = dq_pat == RD_RD_TA || dq_pat == RD_RD_TA_ALL;
++ const uint8_t num_tests = get_num_tests(dq_pat);
++ union tc_bank_rank_a_reg tc_bank_rank_a[NUM_CHANNELS] = { 0 };
++ if (is_turnaround) {
++ for (uint8_t channel = 0; channel < NUM_CHANNELS; channel++) {
++ if (!(chanmask & BIT(channel)))
++ continue;
++
++ tc_bank_rank_a[channel].raw = ctrl->tc_bankrank_a[channel].raw;
++ }
++ }
++ for (uint8_t t = 0; t < num_tests; t++) {
++ for (uint8_t channel = 0; channel < NUM_CHANNELS; channel++) {
++ if (!(chanmask & BIT(channel)))
++ continue;
++
++ if (dq_pat == SEGMENT_WDB) {
++ mchbar_write32(REUT_ch_WDB_CL_CTRL(channel), pat_wdb_cl[t].raw);
++ /*
++ * Skip programming LFSR save/restore. Too complex
++ * unless power of 2. Program desired loopcount.
++ */
++ const uint8_t pat_lc = ctrl->dq_pat_lc + segment_wdb_lc[t];
++ program_loop_count(ctrl, channel, pat_lc);
++ } else if (dq_pat == CADB) {
++ setup_cadb(ctrl, channel, num_tests, t);
++ } else if (dq_pat == TURN_AROUND_WR || dq_pat == TURN_AROUND_ODT) {
++ union reut_seq_cfg_reg reut_seq_cfg = {
++ .raw = mchbar_read64(REUT_ch_SEQ_CFG(channel)),
++ };
++ reut_seq_cfg.subsequence_start_pointer = t;
++ reut_seq_cfg.subsequence_end_pointer = t;
++ mchbar_write64(REUT_ch_SEQ_CFG(channel), reut_seq_cfg.raw);
++ union reut_seq_addr_inc_ctl_reg addr_inc_ctl = {
++ .raw = mchbar_read64(REUT_ch_SEQ_ADDR_INC_CTL(channel)),
++ };
++ uint8_t ta_inc_rate = 1;
++ if (dq_pat == TURN_AROUND_WR && (t == 0 || t == 7))
++ ta_inc_rate = 0;
++ else if (dq_pat == TURN_AROUND_ODT && (t == 0 || t == 2))
++ ta_inc_rate = 0;
++
++ /* Program increment rate as linear value */
++ addr_inc_ctl.rank_addr_update = BIT(7) | ta_inc_rate;
++ addr_inc_ctl.col_addr_update = BIT(7) | ta_inc_rate;
++ mchbar_write64(REUT_ch_SEQ_ADDR_INC_CTL(channel),
++ addr_inc_ctl.raw);
++ } else if (dq_pat == RD_RD_TA) {
++ tc_bank_rank_a[channel].tRDRD_sr = (t == 0) ? 4 : 5;
++ mchbar_write32(TC_BANK_RANK_A_ch(channel),
++ tc_bank_rank_a[channel].raw);
++ } else if (dq_pat == RD_RD_TA_ALL) {
++ /*
++ * Program tRDRD for SR and DR. Run 8 tests, covering
++ * tRDRD_sr = 4, 5, 6, 7 and tRDRD_dr = min, +1, +2, +3
++ */
++ const uint32_t tRDRD_dr = ctrl->tc_bankrank_a[channel].tRDRD_dr;
++ tc_bank_rank_a[channel].tRDRD_sr = (t % 4) + 4;
++ tc_bank_rank_a[channel].tRDRD_dr = (t % 4) + tRDRD_dr;
++ mchbar_write32(TC_BANK_RANK_A_ch(channel),
++ tc_bank_rank_a[channel].raw);
++
++ /* Program linear rank increment rate */
++ union reut_seq_addr_inc_ctl_reg addr_inc_ctl = {
++ .raw = mchbar_read64(REUT_ch_SEQ_ADDR_INC_CTL(channel)),
++ };
++ addr_inc_ctl.rank_addr_update = BIT(7) | (t / 4) ? 0 : 31;
++ mchbar_write64(REUT_ch_SEQ_ADDR_INC_CTL(channel),
++ addr_inc_ctl.raw);
++ }
++ }
++ bool test_soe = false;
++ for (uint8_t channel = 0; channel < NUM_CHANNELS; channel++) {
++ if (!(chanmask & BIT(channel)))
++ continue;
++
++ const union reut_err_ctl_reg reut_err_ctl = {
++ .raw = mchbar_read32(REUT_ch_ERR_CONTROL(channel)),
++ };
++ const uint8_t soe = reut_err_ctl.stop_on_error_control;
++ if (soe != NSOE) {
++ test_soe = true;
++ break;
++ }
++ }
++ io_reset();
++ mchbar_write32(REUT_GLOBAL_CTL, (union reut_seq_ctl_reg) {
++ .start_test = 1,
++ .clear_errors = clear_errors && t == 0,
++ }.raw);
++ struct mono_time prev, curr;
++ timer_monotonic_get(&prev);
++ union reut_global_err_reg global_err;
++ do {
++ global_err.raw = mchbar_read32(REUT_GLOBAL_ERR);
++ /** TODO: Clean up this mess **/
++ timer_monotonic_get(&curr);
++ if (mono_time_diff_microseconds(&prev, &curr) > 1000 * 1000) {
++ mchbar_write32(REUT_GLOBAL_CTL, (union reut_seq_ctl_reg) {
++ .stop_test = 1,
++ }.raw);
++ printk(BIOS_ERR, "REUT timed out, ch_done: %x\n",
++ global_err.ch_test_done);
++ break;
++ }
++ } while ((global_err.ch_test_done & chanmask) != chanmask);
++ if (test_soe && global_err.ch_error & chanmask)
++ break;
++ }
++ if (is_turnaround) {
++ for (uint8_t channel = 0; channel < NUM_CHANNELS; channel++) {
++ if (!(chanmask & BIT(channel)))
++ continue;
++
++ mchbar_write32(TC_BANK_RANK_A_ch(channel),
++ ctrl->tc_bankrank_a[channel].raw);
++ }
++ }
++ return ((union reut_global_err_reg)mchbar_read32(REUT_GLOBAL_ERR)).ch_error;
++}
+diff --git a/src/northbridge/intel/haswell/registers/mchbar.h b/src/northbridge/intel/haswell/registers/mchbar.h
+index f8408e51a0..817a9f8bf8 100644
+--- a/src/northbridge/intel/haswell/registers/mchbar.h
++++ b/src/northbridge/intel/haswell/registers/mchbar.h
+@@ -94,20 +94,35 @@
+ #define TC_BANK_RANK_D_ch(ch) _MCMAIN_C(0x4014, ch)
+ #define SC_ROUNDT_LAT_ch(ch) _MCMAIN_C(0x4024, ch)
+
++#define REUT_ch_PAT_WDB_CL_MUX_CFG(ch) _MCMAIN_C(0x4040, ch)
++
+ #define REUT_ch_PAT_WDB_CL_MUX_WR_x(ch, x) _MCMAIN_C_X(0x4048, ch, x) /* x in 0 .. 2 */
+ #define REUT_ch_PAT_WDB_CL_MUX_RD_x(ch, x) _MCMAIN_C_X(0x4054, ch, x) /* x in 0 .. 2 */
+
+ #define REUT_ch_PAT_WDB_CL_MUX_LMN(ch) _MCMAIN_C(0x4078, ch)
+
++#define REUT_ch_PAT_WDB_INV(ch) _MCMAIN_C(0x4084, ch)
++
++#define REUT_ch_ERR_CONTROL(ch) _MCMAIN_C(0x4098, ch)
++#define REUT_ch_ERR_ECC_MASK(ch) _MCMAIN_C(0x409c, ch)
++
+ #define SC_WR_ADD_DELAY_ch(ch) _MCMAIN_C(0x40d0, ch)
+
++#define REUT_ch_ERR_DATA_MASK(ch) _MCMAIN_C(0x40d8, ch)
++
+ #define REUT_ch_MISC_CKE_CTRL(ch) _MCMAIN_C(0x4190, ch)
+
++#define REUT_ch_MISC_PAT_CADB_CTRL(ch) _MCMAIN_C(0x4198, ch)
+ #define REUT_ch_PAT_CADB_MRS(ch) _MCMAIN_C(0x419c, ch)
++#define REUT_ch_PAT_CADB_MUX_CTRL(ch) _MCMAIN_C(0x41a0, ch)
++#define REUT_ch_PAT_CADB_MUX_x(ch, x) _MCMAIN_C_X(0x41a4, ch, x) /* x in 0 .. 2 */
+
++#define REUT_ch_PAT_CADB_CL_MUX_LMN(ch) _MCMAIN_C(0x41b0, ch)
+ #define REUT_ch_PAT_CADB_WRITE_PTR(ch) _MCMAIN_C(0x41bc, ch)
+ #define REUT_ch_PAT_CADB_PROG(ch) _MCMAIN_C(0x41c0, ch)
+
++#define REUT_ch_WDB_CL_CTRL(ch) _MCMAIN_C(0x4200, ch)
++
+ #define TC_ZQCAL_ch(ch) _MCMAIN_C(0x4290, ch)
+ #define TC_RFP_ch(ch) _MCMAIN_C(0x4294, ch)
+ #define TC_RFTP_ch(ch) _MCMAIN_C(0x4298, ch)
+@@ -119,12 +134,27 @@
+ #define QCLK_ch_LDAT_SDAT(ch) _MCMAIN_C(0x42d4, ch)
+ #define QCLK_ch_LDAT_DATA_IN_x(ch, x) _MCMAIN_C_X(0x42dc, ch, x) /* x in 0 .. 1 */
+
++#define REUT_GLOBAL_CTL 0x4800
+ #define REUT_GLOBAL_ERR 0x4804
+
++#define REUT_ch_SUBSEQ_x_CTL(ch, x) (0x4808 + 40 * (ch) + 4 * (x))
++
+ #define REUT_ch_SEQ_CFG(ch) (0x48a8 + 8 * (ch))
+
+ #define REUT_ch_SEQ_CTL(ch) (0x48b8 + 4 * (ch))
+
++#define REUT_ch_SEQ_ADDR_START(ch) (0x48d8 + 8 * (ch))
++
++#define REUT_ch_SEQ_ADDR_WRAP(ch) (0x48e8 + 8 * (ch))
++
++#define REUT_ch_SEQ_MISC_CTL(ch) (0x4908 + 4 * (ch))
++
++#define REUT_ch_SEQ_ADDR_INC_CTL(ch) (0x4910 + 8 * (ch))
++
++#define REUT_ch_RANK_LOG_TO_PHYS(ch) (0x4930 + 4 * (ch)) /* 4 bits per rank */
++
++#define HSW_REUT_ch_SEQ_LOOP_COUNT(ch) (0x4980 + 4 * (ch)) /* *** only on C0 *** */
++
+ /* MCMAIN broadcast */
+ #define MCSCHEDS_CBIT 0x4c20
+
+--
+2.39.2
+
diff --git a/resources/coreboot/haswell/patches/0019-haswell-NRI-Add-range-tracking-library.patch b/resources/coreboot/haswell/patches/0019-haswell-NRI-Add-range-tracking-library.patch
new file mode 100644
index 00000000..f433b043
--- /dev/null
+++ b/resources/coreboot/haswell/patches/0019-haswell-NRI-Add-range-tracking-library.patch
@@ -0,0 +1,222 @@
+From 9fba0468e75877cbda62f5eaeef1946d6489a8f9 Mon Sep 17 00:00:00 2001
+From: Angel Pons <th3fanbus@gmail.com>
+Date: Sun, 8 May 2022 00:56:00 +0200
+Subject: [PATCH 19/26] haswell NRI: Add range tracking library
+
+Implement a small library used to keep track of passing ranges. This
+will be used by 1D training algorithms when margining some parameter.
+
+Change-Id: I8718e85165160afd7c0c8e730b5ce6c9c00f8a60
+Signed-off-by: Angel Pons <th3fanbus@gmail.com>
+---
+ .../intel/haswell/native_raminit/Makefile.inc | 1 +
+ .../intel/haswell/native_raminit/ranges.c | 109 ++++++++++++++++++
+ .../intel/haswell/native_raminit/ranges.h | 68 +++++++++++
+ 3 files changed, 178 insertions(+)
+ create mode 100644 src/northbridge/intel/haswell/native_raminit/ranges.c
+ create mode 100644 src/northbridge/intel/haswell/native_raminit/ranges.h
+
+diff --git a/src/northbridge/intel/haswell/native_raminit/Makefile.inc b/src/northbridge/intel/haswell/native_raminit/Makefile.inc
+index 6e1b365602..2da950771d 100644
+--- a/src/northbridge/intel/haswell/native_raminit/Makefile.inc
++++ b/src/northbridge/intel/haswell/native_raminit/Makefile.inc
+@@ -9,6 +9,7 @@ romstage-y += io_comp_control.c
+ romstage-y += memory_map.c
+ romstage-y += raminit_main.c
+ romstage-y += raminit_native.c
++romstage-y += ranges.c
+ romstage-y += reut.c
+ romstage-y += setup_wdb.c
+ romstage-y += spd_bitmunching.c
+diff --git a/src/northbridge/intel/haswell/native_raminit/ranges.c b/src/northbridge/intel/haswell/native_raminit/ranges.c
+new file mode 100644
+index 0000000000..cdebc1fa66
+--- /dev/null
++++ b/src/northbridge/intel/haswell/native_raminit/ranges.c
+@@ -0,0 +1,109 @@
++/* SPDX-License-Identifier: GPL-2.0-or-later */
++
++#include <types.h>
++
++#include "ranges.h"
++
++void linear_record_pass(
++ struct linear_train_data *const data,
++ const bool pass,
++ const int32_t value,
++ const int32_t start,
++ const int32_t step)
++{
++ /* If this is the first time, initialize all values */
++ if (value == start) {
++ /*
++ * If value passed, create a zero-length region for the current value,
++ * which may be extended as long as the successive values are passing.
++ *
++ * Otherwise, create a zero-length range for the preceding value. This
++ * range cannot be extended by other passing values, which is desired.
++ */
++ data->current.start = start - (pass ? 0 : step);
++ data->current.end = data->current.start;
++ data->largest = data->current;
++ } else if (pass) {
++ /* If this pass is not contiguous, it belongs to a new region */
++ if (data->current.end != (value - step))
++ data->current.start = value;
++
++ /* Update end of current region */
++ data->current.end = value;
++
++ /* Update largest region */
++ if (range_width(data->current) > range_width(data->largest))
++ data->largest = data->current;
++ }
++}
++
++void phase_record_pass(
++ struct phase_train_data *const data,
++ const bool pass,
++ const int32_t value,
++ const int32_t start,
++ const int32_t step)
++{
++ /* If this is the first time, initialize all values */
++ if (value == start) {
++ /*
++ * If value passed, create a zero-length region for the current value,
++ * which may be extended as long as the successive values are passing.
++ *
++ * Otherwise, create a zero-length range for the preceding value. This
++ * range cannot be extended by other passing values, which is desired.
++ */
++ data->current.start = start - (pass ? 0 : step);
++ data->current.end = data->current.start;
++ data->largest = data->current;
++ data->initial = data->current;
++ return;
++ }
++ if (!pass)
++ return;
++
++ /* Update initial region */
++ if (data->initial.end == (value - step))
++ data->initial.end = value;
++
++ /* If this pass is not contiguous, it belongs to a new region */
++ if (data->current.end != (value - step))
++ data->current.start = value;
++
++ /* Update end of current region */
++ data->current.end = value;
++
++ /* Update largest region */
++ if (range_width(data->current) > range_width(data->largest))
++ data->largest = data->current;
++}
++
++void phase_append_initial_to_current(
++ struct phase_train_data *const data,
++ const int32_t start,
++ const int32_t step)
++{
++ /* If initial region is valid and does not overlap, append it */
++ if (data->initial.start == start && data->initial.end != data->current.end)
++ data->current.end += step + range_width(data->initial);
++
++ /* Update largest region */
++ if (range_width(data->current) > range_width(data->largest))
++ data->largest = data->current;
++}
++
++void phase_append_current_to_initial(
++ struct phase_train_data *const data,
++ const int32_t start,
++ const int32_t step)
++{
++ /* If initial region is valid and does not overlap, append it */
++ if (data->initial.start == start && data->initial.end != data->current.end) {
++ data->initial.start -= (step + range_width(data->current));
++ data->current = data->initial;
++ }
++
++ /* Update largest region */
++ if (range_width(data->current) > range_width(data->largest))
++ data->largest = data->current;
++}
+diff --git a/src/northbridge/intel/haswell/native_raminit/ranges.h b/src/northbridge/intel/haswell/native_raminit/ranges.h
+new file mode 100644
+index 0000000000..235392df96
+--- /dev/null
++++ b/src/northbridge/intel/haswell/native_raminit/ranges.h
+@@ -0,0 +1,68 @@
++/* SPDX-License-Identifier: GPL-2.0-or-later */
++
++#ifndef HASWELL_RAMINIT_RANGES_H
++#define HASWELL_RAMINIT_RANGES_H
++
++#include <types.h>
++
++/*
++ * Many algorithms shmoo some parameter to determine the largest passing
++ * range. Provide a common implementation to avoid redundant boilerplate.
++ */
++struct passing_range {
++ int32_t start;
++ int32_t end;
++};
++
++/* Structure for linear parameters, such as roundtrip delays */
++struct linear_train_data {
++ struct passing_range current;
++ struct passing_range largest;
++};
++
++/*
++ * Phase ranges are "circular": the first and last indices are contiguous.
++ * To correctly determine the largest passing range, one has to combine
++ * the initial range and the current range when processing the last index.
++ */
++struct phase_train_data {
++ struct passing_range initial;
++ struct passing_range current;
++ struct passing_range largest;
++};
++
++static inline int32_t range_width(const struct passing_range range)
++{
++ return range.end - range.start;
++}
++
++static inline int32_t range_center(const struct passing_range range)
++{
++ return range.start + range_width(range) / 2;
++}
++
++void linear_record_pass(
++ struct linear_train_data *data,
++ bool pass,
++ int32_t value,
++ int32_t start,
++ int32_t step);
++
++void phase_record_pass(
++ struct phase_train_data *data,
++ bool pass,
++ int32_t value,
++ int32_t start,
++ int32_t step);
++
++void phase_append_initial_to_current(
++ struct phase_train_data *data,
++ int32_t start,
++ int32_t step);
++
++void phase_append_current_to_initial(
++ struct phase_train_data *data,
++ int32_t start,
++ int32_t step);
++
++#endif
+--
+2.39.2
+
diff --git a/resources/coreboot/haswell/patches/0020-haswell-NRI-Add-library-to-change-margins.patch b/resources/coreboot/haswell/patches/0020-haswell-NRI-Add-library-to-change-margins.patch
new file mode 100644
index 00000000..30926494
--- /dev/null
+++ b/resources/coreboot/haswell/patches/0020-haswell-NRI-Add-library-to-change-margins.patch
@@ -0,0 +1,294 @@
+From 54cfbe4cf53d16f747bfcfadd20445a0f5f1e5db Mon Sep 17 00:00:00 2001
+From: Angel Pons <th3fanbus@gmail.com>
+Date: Sun, 8 May 2022 01:11:03 +0200
+Subject: [PATCH 20/26] haswell NRI: Add library to change margins
+
+Implement a library to change Rx/Tx margins. It will be expanded later.
+
+Change-Id: I0b55aba428d8b4d4e16d2fbdec57235ce3ce8adf
+Signed-off-by: Angel Pons <th3fanbus@gmail.com>
+---
+ .../intel/haswell/native_raminit/Makefile.inc | 1 +
+ .../haswell/native_raminit/change_margin.c | 154 ++++++++++++++++++
+ .../haswell/native_raminit/raminit_native.h | 50 ++++++
+ .../intel/haswell/registers/mchbar.h | 9 +
+ 4 files changed, 214 insertions(+)
+ create mode 100644 src/northbridge/intel/haswell/native_raminit/change_margin.c
+
+diff --git a/src/northbridge/intel/haswell/native_raminit/Makefile.inc b/src/northbridge/intel/haswell/native_raminit/Makefile.inc
+index 2da950771d..ebe9e9b762 100644
+--- a/src/northbridge/intel/haswell/native_raminit/Makefile.inc
++++ b/src/northbridge/intel/haswell/native_raminit/Makefile.inc
+@@ -1,5 +1,6 @@
+ ## SPDX-License-Identifier: GPL-2.0-or-later
+
++romstage-y += change_margin.c
+ romstage-y += configure_mc.c
+ romstage-y += ddr3.c
+ romstage-y += jedec_reset.c
+diff --git a/src/northbridge/intel/haswell/native_raminit/change_margin.c b/src/northbridge/intel/haswell/native_raminit/change_margin.c
+new file mode 100644
+index 0000000000..12da59580f
+--- /dev/null
++++ b/src/northbridge/intel/haswell/native_raminit/change_margin.c
+@@ -0,0 +1,154 @@
++/* SPDX-License-Identifier: GPL-2.0-or-later */
++
++#include <commonlib/clamp.h>
++#include <console/console.h>
++#include <delay.h>
++#include <northbridge/intel/haswell/haswell.h>
++#include <timer.h>
++
++#include "raminit_native.h"
++
++void update_rxt(
++ struct sysinfo *ctrl,
++ const uint8_t channel,
++ const uint8_t rank,
++ const uint8_t byte,
++ const enum rxt_subfield subfield,
++ const int32_t value)
++{
++ union ddr_data_rx_train_rank_reg rxt = {
++ .rcven = ctrl->rcven[channel][rank][byte],
++ .dqs_p = ctrl->rxdqsp[channel][rank][byte],
++ .rx_eq = ctrl->rx_eq[channel][rank][byte],
++ .dqs_n = ctrl->rxdqsn[channel][rank][byte],
++ .vref = ctrl->rxvref[channel][rank][byte],
++ };
++ int32_t new_value;
++ switch (subfield) {
++ case RXT_RCVEN:
++ new_value = clamp_s32(0, value, 511);
++ rxt.rcven = new_value;
++ break;
++ case RXT_RXDQS_P:
++ new_value = clamp_s32(0, value, 63);
++ rxt.dqs_p = new_value;
++ break;
++ case RXT_RX_EQ:
++ new_value = clamp_s32(0, value, 31);
++ rxt.rx_eq = new_value;
++ break;
++ case RXT_RXDQS_N:
++ new_value = clamp_s32(0, value, 63);
++ rxt.dqs_n = new_value;
++ break;
++ case RXT_RX_VREF:
++ new_value = clamp_s32(-32, value, 31);
++ rxt.vref = new_value;
++ break;
++ case RXT_RXDQS_BOTH:
++ new_value = clamp_s32(0, value, 63);
++ rxt.dqs_p = new_value;
++ rxt.dqs_n = new_value;
++ break;
++ case RXT_RESTORE:
++ new_value = value;
++ break;
++ default:
++ die("%s: Unhandled subfield index %u\n", __func__, subfield);
++ }
++
++ if (new_value != value) {
++ printk(BIOS_ERR, "%s: Overflow for subfield %u: %d ---> %d\n",
++ __func__, subfield, value, new_value);
++ }
++ mchbar_write32(RX_TRAIN_ch_r_b(channel, rank, byte), rxt.raw);
++ download_regfile(ctrl, channel, false, rank, REG_FILE_USE_RANK, byte, true, false);
++}
++
++void update_txt(
++ struct sysinfo *ctrl,
++ const uint8_t channel,
++ const uint8_t rank,
++ const uint8_t byte,
++ const enum txt_subfield subfield,
++ const int32_t value)
++{
++ union ddr_data_tx_train_rank_reg txt = {
++ .dq_delay = ctrl->tx_dq[channel][rank][byte],
++ .dqs_delay = ctrl->txdqs[channel][rank][byte],
++ .tx_eq = ctrl->tx_eq[channel][rank][byte],
++ };
++ int32_t new_value;
++ switch (subfield) {
++ case TXT_TX_DQ:
++ new_value = clamp_s32(0, value, 511);
++ txt.dq_delay = new_value;
++ break;
++ case TXT_TXDQS:
++ new_value = clamp_s32(0, value, 511);
++ txt.dqs_delay = new_value;
++ break;
++ case TXT_TX_EQ:
++ new_value = clamp_s32(0, value, 63);
++ txt.tx_eq = new_value;
++ break;
++ case TXT_DQDQS_OFF:
++ new_value = value;
++ txt.dqs_delay += new_value;
++ txt.dq_delay += new_value;
++ break;
++ case TXT_RESTORE:
++ new_value = value;
++ break;
++ default:
++ die("%s: Unhandled subfield index %u\n", __func__, subfield);
++ }
++ if (new_value != value) {
++ printk(BIOS_ERR, "%s: Overflow for subfield %u: %d ---> %d\n",
++ __func__, subfield, value, new_value);
++ }
++ mchbar_write32(TX_TRAIN_ch_r_b(channel, rank, byte), txt.raw);
++ download_regfile(ctrl, channel, false, rank, REG_FILE_USE_RANK, byte, false, true);
++}
++
++void download_regfile(
++ struct sysinfo *ctrl,
++ const uint8_t channel,
++ const bool multicast,
++ const uint8_t rank,
++ const enum regfile_mode regfile,
++ const uint8_t byte,
++ const bool read_rf_rd,
++ const bool read_rf_wr)
++{
++ union reut_seq_base_addr_reg reut_seq_base_addr;
++ switch (regfile) {
++ case REG_FILE_USE_START:
++ reut_seq_base_addr.raw = mchbar_read64(REUT_ch_SEQ_ADDR_START(channel));
++ break;
++ case REG_FILE_USE_CURRENT:
++ reut_seq_base_addr.raw = mchbar_read64(REUT_ch_SEQ_ADDR_CURRENT(channel));
++ break;
++ case REG_FILE_USE_RANK:
++ reut_seq_base_addr.raw = 0;
++ if (rank >= NUM_SLOTRANKS)
++ die("%s: bad rank %u\n", __func__, rank);
++ break;
++ default:
++ die("%s: Invalid regfile param %u\n", __func__, regfile);
++ }
++ uint8_t phys_rank = rank;
++ if (reut_seq_base_addr.raw != 0) {
++ /* Map REUT logical rank to physical rank */
++ const uint32_t log_to_phys = mchbar_read32(REUT_ch_RANK_LOG_TO_PHYS(channel));
++ phys_rank = log_to_phys >> (reut_seq_base_addr.rank_addr * 4) & 0x3;
++ }
++ uint32_t reg = multicast ? DDR_DATA_ch_CONTROL_0(channel) : DQ_CONTROL_0(channel, byte);
++ union ddr_data_control_0_reg ddr_data_control_0 = {
++ .raw = mchbar_read32(reg),
++ };
++ ddr_data_control_0.read_rf_rd = read_rf_rd;
++ ddr_data_control_0.read_rf_wr = read_rf_wr;
++ ddr_data_control_0.read_rf_rank = phys_rank;
++ mchbar_write32(reg, ddr_data_control_0.raw);
++}
+diff --git a/src/northbridge/intel/haswell/native_raminit/raminit_native.h b/src/northbridge/intel/haswell/native_raminit/raminit_native.h
+index 56df36ca8d..7c1a786780 100644
+--- a/src/northbridge/intel/haswell/native_raminit/raminit_native.h
++++ b/src/northbridge/intel/haswell/native_raminit/raminit_native.h
+@@ -117,6 +117,30 @@ enum test_stop {
+ ALSOE = 3, /* Stop on all lanes error */
+ };
+
++enum rxt_subfield {
++ RXT_RCVEN = 0,
++ RXT_RXDQS_P = 1,
++ RXT_RX_EQ = 2,
++ RXT_RXDQS_N = 3,
++ RXT_RX_VREF = 4,
++ RXT_RXDQS_BOTH = 5,
++ RXT_RESTORE = 255,
++};
++
++enum txt_subfield {
++ TXT_TX_DQ = 0,
++ TXT_TXDQS = 1,
++ TXT_TX_EQ = 2,
++ TXT_DQDQS_OFF = 3,
++ TXT_RESTORE = 255,
++};
++
++enum regfile_mode {
++ REG_FILE_USE_RANK, /* Used when changing parameters for each rank */
++ REG_FILE_USE_START, /* Used when changing parameters before the test */
++ REG_FILE_USE_CURRENT, /* Used when changing parameters after the test */
++};
++
+ struct wdb_pat {
+ uint32_t start_ptr; /* Starting pointer in WDB */
+ uint32_t stop_ptr; /* Stopping pointer in WDB */
+@@ -452,6 +476,32 @@ uint8_t select_reut_ranks(struct sysinfo *ctrl, uint8_t channel, uint8_t rankmas
+ void run_mpr_io_test(bool clear_errors);
+ uint8_t run_io_test(struct sysinfo *ctrl, uint8_t chanmask, uint8_t dq_pat, bool clear_errors);
+
++void update_rxt(
++ struct sysinfo *ctrl,
++ uint8_t channel,
++ uint8_t rank,
++ uint8_t byte,
++ enum rxt_subfield subfield,
++ int32_t value);
++
++void update_txt(
++ struct sysinfo *ctrl,
++ uint8_t channel,
++ uint8_t rank,
++ uint8_t byte,
++ enum txt_subfield subfield,
++ int32_t value);
++
++void download_regfile(
++ struct sysinfo *ctrl,
++ uint8_t channel,
++ bool multicast,
++ uint8_t rank,
++ enum regfile_mode regfile,
++ uint8_t byte,
++ bool read_rf_rd,
++ bool read_rf_wr);
++
+ uint8_t get_rx_bias(const struct sysinfo *ctrl);
+
+ uint8_t get_tCWL(uint32_t mem_clock_mhz);
+diff --git a/src/northbridge/intel/haswell/registers/mchbar.h b/src/northbridge/intel/haswell/registers/mchbar.h
+index 817a9f8bf8..a81559bb1e 100644
+--- a/src/northbridge/intel/haswell/registers/mchbar.h
++++ b/src/northbridge/intel/haswell/registers/mchbar.h
+@@ -15,7 +15,11 @@
+ /* Register definitions */
+
+ /* DDR DATA per-channel per-bytelane */
++#define RX_TRAIN_ch_r_b(ch, rank, byte) _DDRIO_C_R_B(0x0000, ch, rank, byte)
++#define TX_TRAIN_ch_r_b(ch, rank, byte) _DDRIO_C_R_B(0x0020, ch, rank, byte)
++
+ #define DQ_CONTROL_2(ch, byte) _DDRIO_C_R_B(0x0064, ch, 0, byte)
++#define DQ_CONTROL_0(ch, byte) _DDRIO_C_R_B(0x0074, ch, 0, byte)
+
+ /* DDR CKE per-channel */
+ #define DDR_CKE_ch_CMD_COMP_OFFSET(ch) _DDRIO_C_R_B(0x1204, ch, 0, 0)
+@@ -38,6 +42,9 @@
+ #define DDR_SCRAMBLE_ch(ch) (0x2000 + 4 * (ch))
+ #define DDR_SCRAM_MISC_CONTROL 0x2008
+
++/* DDR DATA per-channel multicast */
++#define DDR_DATA_ch_CONTROL_0(ch) _DDRIO_C_R_B(0x3074, ch, 0, 0)
++
+ /* DDR CMDN/CMDS per-channel (writes go to both CMDN and CMDS fubs) */
+ #define DDR_CMD_ch_COMP_OFFSET(ch) _DDRIO_C_R_B(0x3204, ch, 0, 0)
+ #define DDR_CMD_ch_PI_CODING(ch) _DDRIO_C_R_B(0x3208, ch, 0, 0)
+@@ -147,6 +154,8 @@
+
+ #define REUT_ch_SEQ_ADDR_WRAP(ch) (0x48e8 + 8 * (ch))
+
++#define REUT_ch_SEQ_ADDR_CURRENT(ch) (0x48f8 + 8 * (ch))
++
+ #define REUT_ch_SEQ_MISC_CTL(ch) (0x4908 + 4 * (ch))
+
+ #define REUT_ch_SEQ_ADDR_INC_CTL(ch) (0x4910 + 8 * (ch))
+--
+2.39.2
+
diff --git a/resources/coreboot/haswell/patches/0021-haswell-NRI-Add-RcvEn-training.patch b/resources/coreboot/haswell/patches/0021-haswell-NRI-Add-RcvEn-training.patch
new file mode 100644
index 00000000..9139a67e
--- /dev/null
+++ b/resources/coreboot/haswell/patches/0021-haswell-NRI-Add-RcvEn-training.patch
@@ -0,0 +1,708 @@
+From ac8843553af34855d0331554c03280e66c4ea582 Mon Sep 17 00:00:00 2001
+From: Angel Pons <th3fanbus@gmail.com>
+Date: Sun, 8 May 2022 00:05:41 +0200
+Subject: [PATCH 21/26] haswell NRI: Add RcvEn training
+
+Implement the RcvEn (Receive Enable) calibration procedure.
+
+Change-Id: Ifbfa520f3e0486c56d0988ce67af2ddb9cf29888
+Signed-off-by: Angel Pons <th3fanbus@gmail.com>
+---
+ .../intel/haswell/native_raminit/Makefile.inc | 1 +
+ .../haswell/native_raminit/raminit_main.c | 1 +
+ .../haswell/native_raminit/raminit_native.h | 14 +
+ .../haswell/native_raminit/reg_structs.h | 13 +
+ .../native_raminit/train_receive_enable.c | 561 ++++++++++++++++++
+ .../intel/haswell/registers/mchbar.h | 3 +
+ 6 files changed, 593 insertions(+)
+ create mode 100644 src/northbridge/intel/haswell/native_raminit/train_receive_enable.c
+
+diff --git a/src/northbridge/intel/haswell/native_raminit/Makefile.inc b/src/northbridge/intel/haswell/native_raminit/Makefile.inc
+index ebe9e9b762..e2fbfb4211 100644
+--- a/src/northbridge/intel/haswell/native_raminit/Makefile.inc
++++ b/src/northbridge/intel/haswell/native_raminit/Makefile.inc
+@@ -16,3 +16,4 @@ romstage-y += setup_wdb.c
+ romstage-y += spd_bitmunching.c
+ romstage-y += testing_io.c
+ romstage-y += timings_refresh.c
++romstage-y += train_receive_enable.c
+diff --git a/src/northbridge/intel/haswell/native_raminit/raminit_main.c b/src/northbridge/intel/haswell/native_raminit/raminit_main.c
+index 5e4674957d..7d444659c3 100644
+--- a/src/northbridge/intel/haswell/native_raminit/raminit_main.c
++++ b/src/northbridge/intel/haswell/native_raminit/raminit_main.c
+@@ -60,6 +60,7 @@ static const struct task_entry cold_boot[] = {
+ { configure_memory_map, true, "MEMMAP", },
+ { do_jedec_init, true, "JEDECINIT", },
+ { pre_training, true, "PRETRAIN", },
++ { train_receive_enable, true, "RCVET", },
+ };
+
+ /* Return a generic stepping value to make stepping checks simpler */
+diff --git a/src/northbridge/intel/haswell/native_raminit/raminit_native.h b/src/northbridge/intel/haswell/native_raminit/raminit_native.h
+index 7c1a786780..a36ebfacd1 100644
+--- a/src/northbridge/intel/haswell/native_raminit/raminit_native.h
++++ b/src/northbridge/intel/haswell/native_raminit/raminit_native.h
+@@ -42,6 +42,9 @@
+ #define NUM_WDB_CL_MUX_SEEDS 3
+ #define NUM_CADB_MUX_SEEDS 3
+
++/* Specified in PI ticks. 64 PI ticks == 1 qclk */
++#define tDQSCK_DRIFT 64
++
+ /* ZQ calibration types */
+ enum {
+ ZQ_INIT, /* DDR3: ZQCL with tZQinit, LPDDR3: ZQ Init with tZQinit */
+@@ -188,6 +191,7 @@ enum raminit_status {
+ RAMINIT_STATUS_MPLL_INIT_FAILURE,
+ RAMINIT_STATUS_POLL_TIMEOUT,
+ RAMINIT_STATUS_REUT_ERROR,
++ RAMINIT_STATUS_RCVEN_FAILURE,
+ RAMINIT_STATUS_UNSPECIFIED_ERROR, /** TODO: Deprecated in favor of specific values **/
+ };
+
+@@ -270,6 +274,10 @@ struct sysinfo {
+
+ union ddr_data_vref_adjust_reg dimm_vref;
+
++ uint8_t io_latency[NUM_CHANNELS][NUM_SLOTRANKS];
++ uint8_t rt_latency[NUM_CHANNELS][NUM_SLOTRANKS];
++ uint32_t rt_io_comp[NUM_CHANNELS];
++
+ uint32_t data_offset_train[NUM_CHANNELS][NUM_LANES];
+ uint32_t data_offset_comp[NUM_CHANNELS][NUM_LANES];
+
+@@ -344,6 +352,11 @@ static inline void clear_data_offset_train_all(struct sysinfo *ctrl)
+ memset(ctrl->data_offset_train, 0, sizeof(ctrl->data_offset_train));
+ }
+
++static inline uint32_t get_data_train_feedback(const uint8_t channel, const uint8_t byte)
++{
++ return mchbar_read32(DDR_DATA_TRAIN_FEEDBACK(channel, byte));
++}
++
+ /* Number of ticks to wait in units of 69.841279 ns (citation needed) */
+ static inline void tick_delay(const uint32_t delay)
+ {
+@@ -401,6 +414,7 @@ enum raminit_status convert_timings(struct sysinfo *ctrl);
+ enum raminit_status configure_mc(struct sysinfo *ctrl);
+ enum raminit_status configure_memory_map(struct sysinfo *ctrl);
+ enum raminit_status do_jedec_init(struct sysinfo *ctrl);
++enum raminit_status train_receive_enable(struct sysinfo *ctrl);
+
+ void configure_timings(struct sysinfo *ctrl);
+ void configure_refresh(struct sysinfo *ctrl);
+diff --git a/src/northbridge/intel/haswell/native_raminit/reg_structs.h b/src/northbridge/intel/haswell/native_raminit/reg_structs.h
+index b943259b91..b099f4bb82 100644
+--- a/src/northbridge/intel/haswell/native_raminit/reg_structs.h
++++ b/src/northbridge/intel/haswell/native_raminit/reg_structs.h
+@@ -297,6 +297,19 @@ union ddr_scram_misc_control_reg {
+ uint32_t raw;
+ };
+
++union sc_io_latency_reg {
++ struct __packed {
++ uint32_t iolat_rank0 : 4; // Bits 3:0
++ uint32_t iolat_rank1 : 4; // Bits 7:4
++ uint32_t iolat_rank2 : 4; // Bits 11:8
++ uint32_t iolat_rank3 : 4; // Bits 15:12
++ uint32_t rt_iocomp : 6; // Bits 21:16
++ uint32_t : 9; // Bits 30:22
++ uint32_t dis_rt_clk_gate : 1; // Bits 31:31
++ };
++ uint32_t raw;
++};
++
+ union mcscheds_cbit_reg {
+ struct __packed {
+ uint32_t dis_opp_cas : 1; // Bits 0:0
+diff --git a/src/northbridge/intel/haswell/native_raminit/train_receive_enable.c b/src/northbridge/intel/haswell/native_raminit/train_receive_enable.c
+new file mode 100644
+index 0000000000..576c6bc21e
+--- /dev/null
++++ b/src/northbridge/intel/haswell/native_raminit/train_receive_enable.c
+@@ -0,0 +1,561 @@
++/* SPDX-License-Identifier: GPL-2.0-or-later */
++
++#include <console/console.h>
++#include <northbridge/intel/haswell/haswell.h>
++#include <types.h>
++
++#include "raminit_native.h"
++#include "ranges.h"
++
++#define RCVEN_PLOT RAM_DEBUG
++
++static enum raminit_status change_rcven_timing(struct sysinfo *ctrl, const uint8_t channel)
++{
++ int16_t max_rcven = -4096;
++ int16_t min_rcven = 4096;
++ int16_t max_rcven_rank[NUM_SLOTRANKS];
++ int16_t min_rcven_rank[NUM_SLOTRANKS];
++ for (uint8_t rank = 0; rank < NUM_SLOTRANKS; rank++) {
++ max_rcven_rank[rank] = max_rcven;
++ min_rcven_rank[rank] = min_rcven;
++ }
++ for (uint8_t rank = 0; rank < NUM_SLOTRANKS; rank++) {
++ if (!rank_in_ch(ctrl, rank, channel))
++ continue;
++
++ for (uint8_t byte = 0; byte < ctrl->lanes; byte++) {
++ int16_t new_rcven = ctrl->rcven[channel][rank][byte];
++ new_rcven -= ctrl->io_latency[channel][rank] * 64;
++ if (max_rcven_rank[rank] < new_rcven)
++ max_rcven_rank[rank] = new_rcven;
++
++ if (min_rcven_rank[rank] > new_rcven)
++ min_rcven_rank[rank] = new_rcven;
++ }
++ if (max_rcven < max_rcven_rank[rank])
++ max_rcven = max_rcven_rank[rank];
++
++ if (min_rcven > min_rcven_rank[rank])
++ min_rcven = min_rcven_rank[rank];
++ }
++
++ /*
++ * Determine how far we are from the ideal center point for RcvEn timing.
++ * (PiIdeal - AveRcvEn) / 64 is the ideal number of cycles we should have
++ * for IO latency. command training will reduce this by 64, so plan for
++ * that now in the ideal value. Round to closest integer.
++ */
++ const int16_t rre_pi_ideal = 256 + 64;
++ const int16_t pi_reserve = 64;
++ const int16_t rcven_center = (max_rcven + min_rcven) / 2;
++ const int8_t iolat_target = DIV_ROUND_CLOSEST(rre_pi_ideal - rcven_center, 64);
++
++ int8_t io_g_offset = 0;
++ int8_t io_lat[NUM_SLOTRANKS] = { 0 };
++ for (uint8_t rank = 0; rank < NUM_SLOTRANKS; rank++) {
++ if (!rank_in_ch(ctrl, rank, channel))
++ continue;
++
++ io_lat[rank] = iolat_target;
++
++ /* Check for RcvEn underflow/overflow */
++ const int16_t rcven_lower = 64 * io_lat[rank] + min_rcven_rank[rank];
++ if (rcven_lower < pi_reserve)
++ io_lat[rank] += DIV_ROUND_UP(pi_reserve - rcven_lower, 64);
++
++ const int16_t rcven_upper = 64 * io_lat[rank] + max_rcven_rank[rank];
++ if (rcven_upper > 511 - pi_reserve)
++ io_lat[rank] -= DIV_ROUND_UP(rcven_upper - (511 - pi_reserve), 64);
++
++ /* Check for IO latency over/underflow */
++ if (io_lat[rank] - io_g_offset > 14)
++ io_g_offset = io_lat[rank] - 14;
++
++ if (io_lat[rank] - io_g_offset < 1)
++ io_g_offset = io_lat[rank] - 1;
++
++ const int8_t cycle_offset = io_lat[rank] - ctrl->io_latency[channel][rank];
++ for (uint8_t byte = 0; byte < ctrl->lanes; byte++) {
++ ctrl->rcven[channel][rank][byte] += 64 * cycle_offset;
++ update_rxt(ctrl, channel, rank, byte, RXT_RESTORE, 0);
++ }
++ }
++
++ /* Calculate new IO comp latency */
++ union sc_io_latency_reg sc_io_lat = {
++ .raw = mchbar_read32(SC_IO_LATENCY_ch(channel)),
++ };
++
++ /* Check if we are underflowing or overflowing this field */
++ if (io_g_offset < 0 && sc_io_lat.rt_iocomp < -io_g_offset) {
++ printk(BIOS_ERR, "%s: IO COMP underflow\n", __func__);
++ printk(BIOS_ERR, "io_g_offset: %d\n", io_g_offset);
++ printk(BIOS_ERR, "rt_iocomp: %u\n", sc_io_lat.rt_iocomp);
++ return RAMINIT_STATUS_RCVEN_FAILURE;
++ }
++ if (io_g_offset > 0 && io_g_offset > 0x3f - sc_io_lat.rt_iocomp) {
++ printk(BIOS_ERR, "%s: IO COMP overflow\n", __func__);
++ printk(BIOS_ERR, "io_g_offset: %d\n", io_g_offset);
++ printk(BIOS_ERR, "rt_iocomp: %u\n", sc_io_lat.rt_iocomp);
++ return RAMINIT_STATUS_RCVEN_FAILURE;
++ }
++ sc_io_lat.rt_iocomp += io_g_offset;
++ ctrl->rt_io_comp[channel] = sc_io_lat.rt_iocomp;
++ for (uint8_t rank = 0; rank < NUM_SLOTRANKS; rank++) {
++ if (ctrl->rankmap[channel] & BIT(rank))
++ ctrl->io_latency[channel][rank] = io_lat[rank] - io_g_offset;
++
++ const uint8_t shift = rank * 4;
++ sc_io_lat.raw &= ~(0xf << shift);
++ sc_io_lat.raw |= ctrl->io_latency[channel][rank] << shift;
++ }
++ mchbar_write32(SC_IO_LATENCY_ch(channel), sc_io_lat.raw);
++ return RAMINIT_STATUS_SUCCESS;
++}
++
++#define RL_START (256 + 24)
++#define RL_STOP (384 + 24)
++#define RL_STEP 8
++
++#define RE_NUM_SAMPLES 6
++
++static enum raminit_status verify_high_region(const int32_t center, const int32_t lwidth)
++{
++ if (center > RL_STOP) {
++ /* Check if center of high was found where it should be */
++ printk(BIOS_ERR, "RcvEn: Center of high (%d) higher than expected\n", center);
++ return RAMINIT_STATUS_RCVEN_FAILURE;
++ }
++ if (lwidth <= 32) {
++ /* Check if width is large enough */
++ printk(BIOS_ERR, "RcvEn: Width of high region (%d) too small\n", lwidth);
++ return RAMINIT_STATUS_RCVEN_FAILURE;
++ }
++ if (lwidth >= 96) {
++ /* Since we're calibrating a phase, a too large region is a problem */
++ printk(BIOS_ERR, "RcvEn: Width of high region (%d) too large\n", lwidth);
++ return RAMINIT_STATUS_RCVEN_FAILURE;
++ }
++ return RAMINIT_STATUS_SUCCESS;
++}
++
++static void program_io_latency(struct sysinfo *ctrl, const uint8_t channel, const uint8_t rank)
++{
++ const uint8_t shift = rank * 4;
++ const uint8_t iolat = ctrl->io_latency[channel][rank];
++ mchbar_clrsetbits32(SC_IO_LATENCY_ch(channel), 0xf << shift, iolat << shift);
++}
++
++static void program_rl_delays(struct sysinfo *ctrl, const uint8_t rank, const uint16_t rl_delay)
++{
++ for (uint8_t channel = 0; channel < NUM_CHANNELS; channel++) {
++ if (!rank_in_ch(ctrl, rank, channel))
++ continue;
++
++ for (uint8_t byte = 0; byte < ctrl->lanes; byte++)
++ update_rxt(ctrl, channel, rank, byte, RXT_RCVEN, rl_delay);
++ }
++}
++
++static bool sample_dqs(const uint8_t channel, const uint8_t byte)
++{
++ return (get_data_train_feedback(channel, byte) & 0x1ff) >= BIT(RE_NUM_SAMPLES - 1);
++}
++
++enum raminit_status train_receive_enable(struct sysinfo *ctrl)
++{
++ const struct reut_box reut_addr = {
++ .col = {
++ .start = 0,
++ .stop = 1023,
++ .inc_rate = 0,
++ .inc_val = 1,
++ },
++ };
++ const struct wdb_pat wdb_pattern = {
++ .start_ptr = 0,
++ .stop_ptr = 9,
++ .inc_rate = 32,
++ .dq_pattern = BASIC_VA,
++ };
++
++ const uint16_t bytemask = BIT(ctrl->lanes) - 1;
++ const uint8_t fine_step = 1;
++
++ const uint8_t rt_delta = is_hsw_ult() ? 4 : 2;
++ const uint8_t rt_io_comp = 21 + rt_delta;
++ const uint8_t rt_latency = 16 + rt_delta;
++ setup_io_test(
++ ctrl,
++ ctrl->chanmap,
++ PAT_RD,
++ 2,
++ RE_NUM_SAMPLES + 1,
++ &reut_addr,
++ 0,
++ &wdb_pattern,
++ 0,
++ 8);
++
++ for (uint8_t channel = 0; channel < NUM_CHANNELS; channel++) {
++ if (!does_ch_exist(ctrl, channel))
++ continue;
++
++ for (uint8_t byte = 0; byte < ctrl->lanes; byte++) {
++ union ddr_data_control_2_reg data_control_2 = {
++ .raw = ctrl->dq_control_2[channel][byte],
++ };
++ data_control_2.force_rx_on = 1;
++ mchbar_write32(DQ_CONTROL_2(channel, byte), data_control_2.raw);
++ }
++ union ddr_data_control_0_reg data_control_0 = {
++ .raw = ctrl->dq_control_0[channel],
++ };
++ if (ctrl->lpddr) {
++ /**
++ * W/A for b4618574 - @todo: remove for HSW ULT C0
++ * Can't have force_odt_on together with leaker, disable LPDDR
++ * mode during this training step. lpddr_mode is restored
++ * at the end of this function from the host structure.
++ */
++ data_control_0.lpddr_mode = 0;
++ mchbar_write32(DDR_DATA_ch_CONTROL_0(channel), data_control_0.raw);
++ }
++ data_control_0.force_odt_on = 1;
++ data_control_0.rl_training_mode = 1;
++ mchbar_write32(DDR_DATA_ch_CONTROL_0(channel), data_control_0.raw);
++ mchbar_write32(SC_IO_LATENCY_ch(channel), (union sc_io_latency_reg) {
++ .rt_iocomp = rt_io_comp,
++ }.raw);
++ }
++ enum raminit_status status = RAMINIT_STATUS_SUCCESS;
++ for (uint8_t rank = 0; rank < NUM_SLOTRANKS; rank++) {
++ if (!does_rank_exist(ctrl, rank))
++ continue;
++
++ /*
++ * Set initial roundtrip latency values. Assume -4 QCLK for worst board
++ * layout. This is calculated as HW_ROUNDT_LAT_DEFAULT_VALUE plus:
++ *
++ * DDR3: Default + (2 * tAA) + 4 QCLK + PI_CLK + N-mode value * 2
++ * LPDDR3: Default + (2 * tAA) + 4 QCLK + PI_CLK + tDQSCK_max
++ *
++ * N-mode is 3 during training mode. Both channels use the same timings.
++ */
++ /** TODO: differs for LPDDR **/
++ const uint32_t tmp = MAX(ctrl->multiplier, 4) + 5 + 2 * ctrl->tAA;
++ const uint32_t initial_rt_latency = MIN(rt_latency + tmp, 0x3f);
++
++ uint8_t chanmask = 0;
++ for (uint8_t channel = 0; channel < NUM_CHANNELS; channel++) {
++ chanmask |= select_reut_ranks(ctrl, channel, BIT(rank));
++ if (!rank_in_ch(ctrl, rank, channel))
++ continue;
++
++ ctrl->io_latency[channel][rank] = 0;
++ mchbar_write8(SC_ROUNDT_LAT_ch(channel) + rank, initial_rt_latency);
++ ctrl->rt_latency[channel][rank] = initial_rt_latency;
++ }
++
++ printk(BIOS_DEBUG, "Rank %u\n", rank);
++ printk(BIOS_DEBUG, "Steps 1 and 2: Find middle of high region\n");
++ printk(RCVEN_PLOT, "Byte");
++ for (uint8_t channel = 0; channel < NUM_CHANNELS; channel++) {
++ if (!rank_in_ch(ctrl, rank, channel))
++ continue;
++
++ printk(RCVEN_PLOT, "\t");
++ for (uint8_t byte = 0; byte < ctrl->lanes; byte++)
++ printk(RCVEN_PLOT, "%u ", byte);
++ }
++ printk(RCVEN_PLOT, "\nRcvEn\n");
++ struct phase_train_data region_data[NUM_CHANNELS][NUM_LANES] = { 0 };
++ for (uint16_t rl_delay = RL_START; rl_delay < RL_STOP; rl_delay += RL_STEP) {
++ printk(RCVEN_PLOT, " % 3d", rl_delay);
++ program_rl_delays(ctrl, rank, rl_delay);
++ run_io_test(ctrl, chanmask, BASIC_VA, true);
++ for (uint8_t channel = 0; channel < NUM_CHANNELS; channel++) {
++ if (!rank_in_ch(ctrl, rank, channel))
++ continue;
++
++ printk(RCVEN_PLOT, "\t");
++ for (uint8_t byte = 0; byte < ctrl->lanes; byte++) {
++ const bool high = sample_dqs(channel, byte);
++ printk(RCVEN_PLOT, high ? ". " : "# ");
++ phase_record_pass(
++ &region_data[channel][byte],
++ high,
++ rl_delay,
++ RL_START,
++ RL_STEP);
++ }
++ }
++ printk(RCVEN_PLOT, "\n");
++ }
++ printk(RCVEN_PLOT, "\n");
++ printk(BIOS_DEBUG, "Update RcvEn timing to be in the center of high region\n");
++ for (uint8_t channel = 0; channel < NUM_CHANNELS; channel++) {
++ if (!rank_in_ch(ctrl, rank, channel))
++ continue;
++
++ printk(BIOS_DEBUG, "C%u.R%u: \tLeft\tRight\tWidth\tCenter\n",
++ channel, rank);
++ for (uint8_t byte = 0; byte < ctrl->lanes; byte++) {
++ struct phase_train_data *const curr_data =
++ &region_data[channel][byte];
++ phase_append_current_to_initial(curr_data, RL_START, RL_STEP);
++ const int32_t lwidth = range_width(curr_data->largest);
++ const int32_t center = range_center(curr_data->largest);
++ printk(BIOS_DEBUG, " B%u: \t%d\t%d\t%d\t%d\n",
++ byte,
++ curr_data->largest.start,
++ curr_data->largest.end,
++ lwidth,
++ center);
++
++ status = verify_high_region(center, lwidth);
++ if (status) {
++ printk(BIOS_ERR,
++ "RcvEn problems on channel %u, byte %u\n",
++ channel, byte);
++ goto clean_up;
++ }
++ ctrl->rcven[channel][rank][byte] = center;
++ update_rxt(ctrl, channel, rank, byte, RXT_RESTORE, 0);
++ }
++ printk(BIOS_DEBUG, "\n");
++ }
++
++ printk(BIOS_DEBUG, "Step 3: Quarter preamble - Walk backwards\n");
++ printk(RCVEN_PLOT, "Byte");
++ for (uint8_t channel = 0; channel < NUM_CHANNELS; channel++) {
++ if (!rank_in_ch(ctrl, rank, channel))
++ continue;
++
++ printk(RCVEN_PLOT, "\t");
++ for (uint8_t byte = 0; byte < ctrl->lanes; byte++)
++ printk(RCVEN_PLOT, "%u ", byte);
++ }
++ printk(RCVEN_PLOT, "\nIOLAT\n");
++ bool done = false;
++ while (!done) {
++ run_io_test(ctrl, chanmask, BASIC_VA, true);
++ done = true;
++ for (uint8_t channel = 0; channel < NUM_CHANNELS; channel++) {
++ if (!rank_in_ch(ctrl, rank, channel))
++ continue;
++
++ printk(RCVEN_PLOT, " %2u\t", ctrl->io_latency[channel][rank]);
++ uint16_t highs = 0;
++ for (uint8_t byte = 0; byte < ctrl->lanes; byte++) {
++ const bool high = sample_dqs(channel, byte);
++ printk(RCVEN_PLOT, high ? "H " : "L ");
++ if (high)
++ highs |= BIT(byte);
++ }
++ if (!highs)
++ continue;
++
++ done = false;
++
++ /* If all bytes sample high, adjust timing globally */
++ if (highs == bytemask && ctrl->io_latency[channel][rank] < 14) {
++ ctrl->io_latency[channel][rank] += 2;
++ ctrl->io_latency[channel][rank] %= 16;
++ program_io_latency(ctrl, channel, rank);
++ continue;
++ }
++
++ /* Otherwise, adjust individual bytes */
++ for (uint8_t byte = 0; byte < ctrl->lanes; byte++) {
++ if (!(highs & BIT(byte)))
++ continue;
++
++ if (ctrl->rcven[channel][rank][byte] < 128) {
++ printk(BIOS_ERR,
++ "RcvEn underflow: walking backwards\n");
++ printk(BIOS_ERR,
++ "For channel %u, rank %u, byte %u\n",
++ channel, rank, byte);
++ status = RAMINIT_STATUS_RCVEN_FAILURE;
++ goto clean_up;
++ }
++ ctrl->rcven[channel][rank][byte] -= 128;
++ update_rxt(ctrl, channel, rank, byte, RXT_RESTORE, 0);
++ }
++ }
++ printk(RCVEN_PLOT, "\n");
++ }
++ for (uint8_t channel = 0; channel < NUM_CHANNELS; channel++) {
++ if (!rank_in_ch(ctrl, rank, channel))
++ continue;
++
++ printk(BIOS_DEBUG, "\nC%u: Preamble\n", channel);
++ for (uint8_t byte = 0; byte < ctrl->lanes; byte++) {
++ printk(BIOS_DEBUG,
++ " B%u: %u\n", byte, ctrl->rcven[channel][rank][byte]);
++ }
++ }
++ printk(BIOS_DEBUG, "\n");
++
++ printk(BIOS_DEBUG, "Step 4: Add 1 qclk\n");
++ for (uint8_t channel = 0; channel < NUM_CHANNELS; channel++) {
++ if (!rank_in_ch(ctrl, rank, channel))
++ continue;
++
++ for (uint8_t byte = 0; byte < ctrl->lanes; byte++) {
++ ctrl->rcven[channel][rank][byte] += 64;
++ update_rxt(ctrl, channel, rank, byte, RXT_RESTORE, 0);
++ }
++ }
++ printk(BIOS_DEBUG, "\n");
++
++ printk(BIOS_DEBUG, "Step 5: Walk forward to find rising edge\n");
++ printk(RCVEN_PLOT, "Byte");
++ for (uint8_t channel = 0; channel < NUM_CHANNELS; channel++) {
++ if (!rank_in_ch(ctrl, rank, channel))
++ continue;
++
++ printk(RCVEN_PLOT, "\t");
++ for (uint8_t byte = 0; byte < ctrl->lanes; byte++)
++ printk(RCVEN_PLOT, "%u ", byte);
++ }
++ printk(RCVEN_PLOT, "\n inc\n");
++ uint16_t ch_result[NUM_CHANNELS] = { 0 };
++ uint8_t inc_preamble[NUM_CHANNELS][NUM_LANES] = { 0 };
++ for (uint8_t inc = 0; inc < 64; inc += fine_step) {
++ printk(RCVEN_PLOT, " %2u\t", inc);
++ run_io_test(ctrl, chanmask, BASIC_VA, true);
++ done = true;
++ for (uint8_t channel = 0; channel < NUM_CHANNELS; channel++) {
++ if (!rank_in_ch(ctrl, rank, channel))
++ continue;
++
++ for (uint8_t byte = 0; byte < ctrl->lanes; byte++) {
++ if (ch_result[channel] & BIT(byte)) {
++ /* Skip bytes that are already done */
++ printk(RCVEN_PLOT, ". ");
++ continue;
++ }
++ const bool pass = sample_dqs(channel, byte);
++ printk(RCVEN_PLOT, pass ? ". " : "# ");
++ if (pass) {
++ ch_result[channel] |= BIT(byte);
++ continue;
++ }
++ ctrl->rcven[channel][rank][byte] += fine_step;
++ update_rxt(ctrl, channel, rank, byte, RXT_RESTORE, 0);
++ inc_preamble[channel][byte] = inc;
++ }
++ printk(RCVEN_PLOT, "\t");
++ if (ch_result[channel] != bytemask)
++ done = false;
++ }
++ printk(RCVEN_PLOT, "\n");
++ if (done)
++ break;
++ }
++ printk(BIOS_DEBUG, "\n");
++ if (!done) {
++ printk(BIOS_ERR, "Error: Preamble edge not found for all bytes\n");
++ printk(BIOS_ERR, "The final RcvEn results are as follows:\n");
++ for (uint8_t channel = 0; channel < NUM_CHANNELS; channel++) {
++ if (!rank_in_ch(ctrl, rank, channel))
++ continue;
++
++ printk(BIOS_ERR, "Channel %u Rank %u: preamble\n",
++ channel, rank);
++ for (uint8_t byte = 0; byte < ctrl->lanes; byte++) {
++ printk(BIOS_ERR, " Byte %u: %u%s\n", byte,
++ ctrl->rcven[channel][rank][byte],
++ (ch_result[channel] ^ bytemask) & BIT(byte)
++ ? ""
++ : " *** Check this byte! ***");
++ }
++ }
++ status = RAMINIT_STATUS_RCVEN_FAILURE;
++ goto clean_up;
++ }
++
++ printk(BIOS_DEBUG, "Step 6: center on preamble and clean up rank\n");
++ for (uint8_t channel = 0; channel < NUM_CHANNELS; channel++) {
++ if (!rank_in_ch(ctrl, rank, channel))
++ continue;
++
++ printk(BIOS_DEBUG, "C%u: Preamble increment\n", channel);
++ for (uint8_t byte = 0; byte < ctrl->lanes; byte++) {
++ /*
++ * For Traditional, pull in RcvEn by 64. For ULT, take the DQS
++ * drift into account to the specified guardband: tDQSCK_DRIFT.
++ */
++ ctrl->rcven[channel][rank][byte] -= tDQSCK_DRIFT;
++ update_rxt(ctrl, channel, rank, byte, RXT_RESTORE, 0);
++ printk(BIOS_DEBUG, " B%u: %u %u\n", byte,
++ ctrl->rcven[channel][rank][byte],
++ inc_preamble[channel][byte]);
++ }
++ printk(BIOS_DEBUG, "\n");
++ }
++ printk(BIOS_DEBUG, "\n");
++ }
++
++clean_up:
++ for (uint8_t channel = 0; channel < NUM_CHANNELS; channel++) {
++ if (!does_ch_exist(ctrl, channel))
++ continue;
++
++ if (ctrl->lpddr) {
++ /**
++ * W/A for b4618574 - @todo: remove for HSW ULT C0
++ * Can't have force_odt_on together with leaker, disable LPDDR mode for
++ * this training step. This write will disable force_odt_on while still
++ * keeping LPDDR mode disabled. Second write will restore LPDDR mode.
++ */
++ union ddr_data_control_0_reg data_control_0 = {
++ .raw = ctrl->dq_control_0[channel],
++ };
++ data_control_0.lpddr_mode = 0;
++ mchbar_write32(DDR_DATA_ch_CONTROL_0(channel), data_control_0.raw);
++ }
++ mchbar_write32(DDR_DATA_ch_CONTROL_0(channel), ctrl->dq_control_0[channel]);
++ for (uint8_t byte = 0; byte < ctrl->lanes; byte++) {
++ mchbar_write32(DQ_CONTROL_2(channel, byte),
++ ctrl->dq_control_2[channel][byte]);
++ }
++ }
++ io_reset();
++ if (status)
++ return status;
++
++ printk(BIOS_DEBUG, "Step 7: Sync IO latency across all ranks\n");
++ for (uint8_t channel = 0; channel < NUM_CHANNELS; channel++) {
++ if (!does_ch_exist(ctrl, channel))
++ continue;
++
++ status = change_rcven_timing(ctrl, channel);
++ if (status)
++ return status;
++ }
++ printk(BIOS_DEBUG, "\nFinal Receive Enable and IO latency settings:\n");
++ for (uint8_t channel = 0; channel < NUM_CHANNELS; channel++) {
++ if (!does_ch_exist(ctrl, channel))
++ continue;
++
++ for (uint8_t rank = 0; rank < NUM_SLOTRANKS; rank++) {
++ if (!rank_in_ch(ctrl, rank, channel))
++ continue;
++
++ const union sc_io_latency_reg sc_io_latency = {
++ .raw = mchbar_read32(SC_IO_LATENCY_ch(channel)),
++ };
++ printk(BIOS_DEBUG, " C%u.R%u: IOLAT = %u rt_iocomp = %u\n", channel,
++ rank, ctrl->io_latency[channel][rank], sc_io_latency.rt_iocomp);
++ for (uint8_t byte = 0; byte < ctrl->lanes; byte++) {
++ printk(BIOS_DEBUG, " B%u: %u\n", byte,
++ ctrl->rcven[channel][rank][byte]);
++ }
++ printk(BIOS_DEBUG, "\n");
++ }
++ }
++ return status;
++}
+diff --git a/src/northbridge/intel/haswell/registers/mchbar.h b/src/northbridge/intel/haswell/registers/mchbar.h
+index a81559bb1e..9172d4f2b0 100644
+--- a/src/northbridge/intel/haswell/registers/mchbar.h
++++ b/src/northbridge/intel/haswell/registers/mchbar.h
+@@ -18,6 +18,8 @@
+ #define RX_TRAIN_ch_r_b(ch, rank, byte) _DDRIO_C_R_B(0x0000, ch, rank, byte)
+ #define TX_TRAIN_ch_r_b(ch, rank, byte) _DDRIO_C_R_B(0x0020, ch, rank, byte)
+
++#define DDR_DATA_TRAIN_FEEDBACK(ch, byte) _DDRIO_C_R_B(0x0054, ch, 0, byte)
++
+ #define DQ_CONTROL_2(ch, byte) _DDRIO_C_R_B(0x0064, ch, 0, byte)
+ #define DQ_CONTROL_0(ch, byte) _DDRIO_C_R_B(0x0074, ch, 0, byte)
+
+@@ -100,6 +102,7 @@
+ #define COMMAND_RATE_LIMIT_ch(ch) _MCMAIN_C(0x4010, ch)
+ #define TC_BANK_RANK_D_ch(ch) _MCMAIN_C(0x4014, ch)
+ #define SC_ROUNDT_LAT_ch(ch) _MCMAIN_C(0x4024, ch)
++#define SC_IO_LATENCY_ch(ch) _MCMAIN_C(0x4028, ch)
+
+ #define REUT_ch_PAT_WDB_CL_MUX_CFG(ch) _MCMAIN_C(0x4040, ch)
+
+--
+2.39.2
+
diff --git a/resources/coreboot/haswell/patches/0022-haswell-NRI-Add-function-to-change-margins.patch b/resources/coreboot/haswell/patches/0022-haswell-NRI-Add-function-to-change-margins.patch
new file mode 100644
index 00000000..2e6de17c
--- /dev/null
+++ b/resources/coreboot/haswell/patches/0022-haswell-NRI-Add-function-to-change-margins.patch
@@ -0,0 +1,272 @@
+From 8c3874195c0fc1af9d0b84611496689da1c19d8c Mon Sep 17 00:00:00 2001
+From: Angel Pons <th3fanbus@gmail.com>
+Date: Sun, 8 May 2022 11:58:59 +0200
+Subject: [PATCH 22/26] haswell NRI: Add function to change margins
+
+Implement a function to change margin parameters. Haswell provides a
+register to apply an offset to margin parameters during training, so
+make use of it. There are other margin parameters that have not been
+implemented yet, as they are not needed for now and special handling
+is needed to provide offset training functionality.
+
+Change-Id: I5392380e13de3c44e77b7bc9f3b819e2661d1e2d
+Signed-off-by: Angel Pons <th3fanbus@gmail.com>
+---
+ .../haswell/native_raminit/change_margin.c | 136 ++++++++++++++++++
+ .../haswell/native_raminit/raminit_native.h | 39 +++++
+ .../haswell/native_raminit/reg_structs.h | 12 ++
+ .../intel/haswell/registers/mchbar.h | 1 +
+ 4 files changed, 188 insertions(+)
+
+diff --git a/src/northbridge/intel/haswell/native_raminit/change_margin.c b/src/northbridge/intel/haswell/native_raminit/change_margin.c
+index 12da59580f..4ba9cfa5c6 100644
+--- a/src/northbridge/intel/haswell/native_raminit/change_margin.c
++++ b/src/northbridge/intel/haswell/native_raminit/change_margin.c
+@@ -1,5 +1,6 @@
+ /* SPDX-License-Identifier: GPL-2.0-or-later */
+
++#include <assert.h>
+ #include <commonlib/clamp.h>
+ #include <console/console.h>
+ #include <delay.h>
+@@ -152,3 +153,138 @@ void download_regfile(
+ ddr_data_control_0.read_rf_rank = phys_rank;
+ mchbar_write32(reg, ddr_data_control_0.raw);
+ }
++
++static void update_data_offset_train(
++ struct sysinfo *ctrl,
++ const uint8_t param,
++ const uint8_t en_multicast,
++ const uint8_t channel_in,
++ const uint8_t rank,
++ const uint8_t byte_in,
++ const bool update_ctrl,
++ const enum regfile_mode regfile,
++ const uint32_t value)
++{
++ bool is_rd = false;
++ bool is_wr = false;
++ switch (param) {
++ case RdT:
++ case RdV:
++ case RcvEna:
++ is_rd = true;
++ break;
++ case WrT:
++ case WrDqsT:
++ is_wr = true;
++ break;
++ default:
++ die("%s: Invalid margin parameter %u\n", __func__, param);
++ }
++ if (en_multicast) {
++ mchbar_write32(DDR_DATA_OFFSET_TRAIN, value);
++ for (uint8_t channel = 0; channel < NUM_CHANNELS; channel++) {
++ if (!does_ch_exist(ctrl, channel))
++ continue;
++
++ download_regfile(ctrl, channel, true, rank, regfile, 0, is_rd, is_wr);
++ if (update_ctrl) {
++ for (uint8_t byte = 0; byte < ctrl->lanes; byte++)
++ ctrl->data_offset_train[channel][byte] = value;
++ }
++ }
++ } else {
++ mchbar_write32(DDR_DATA_OFFSET_TRAIN_ch_b(channel_in, byte_in), value);
++ download_regfile(ctrl, channel_in, false, rank, regfile, byte_in, is_rd, is_wr);
++ if (update_ctrl)
++ ctrl->data_offset_train[channel_in][byte_in] = value;
++ }
++}
++
++static uint32_t get_max_margin(const enum margin_parameter param)
++{
++ switch (param) {
++ case RcvEna:
++ case RdT:
++ case WrT:
++ case WrDqsT:
++ return MAX_POSSIBLE_TIME;
++ case RdV:
++ return MAX_POSSIBLE_VREF;
++ default:
++ die("%s: Invalid margin parameter %u\n", __func__, param);
++ }
++}
++
++void change_margin(
++ struct sysinfo *ctrl,
++ const enum margin_parameter param,
++ const int32_t value0,
++ const bool en_multicast,
++ const uint8_t channel,
++ const uint8_t rank,
++ const uint8_t byte,
++ const bool update_ctrl,
++ const enum regfile_mode regfile)
++{
++ /** FIXME: Remove this **/
++ if (rank == 0xff)
++ die("%s: rank is 0xff\n", __func__);
++
++ if (!en_multicast && !does_ch_exist(ctrl, channel))
++ die("%s: Tried to change margin of empty channel %u\n", __func__, channel);
++
++ const uint32_t max_value = get_max_margin(param);
++ const int32_t v0 = clamp_s32(-max_value, value0, max_value);
++
++ union ddr_data_offset_train_reg ddr_data_offset_train = {
++ .raw = en_multicast ? 0 : ctrl->data_offset_train[channel][byte],
++ };
++ bool update_offset_train = false;
++ switch (param) {
++ case RcvEna:
++ ddr_data_offset_train.rcven = v0;
++ update_offset_train = true;
++ break;
++ case RdT:
++ ddr_data_offset_train.rx_dqs = v0;
++ update_offset_train = true;
++ break;
++ case WrT:
++ ddr_data_offset_train.tx_dq = v0;
++ update_offset_train = true;
++ break;
++ case WrDqsT:
++ ddr_data_offset_train.tx_dqs = v0;
++ update_offset_train = true;
++ break;
++ case RdV:
++ ddr_data_offset_train.vref = v0;
++ update_offset_train = true;
++ break;
++ default:
++ die("%s: Invalid margin parameter %u\n", __func__, param);
++ }
++ if (update_offset_train) {
++ update_data_offset_train(
++ ctrl,
++ param,
++ en_multicast,
++ channel,
++ rank,
++ byte,
++ update_ctrl,
++ regfile,
++ ddr_data_offset_train.raw);
++ }
++}
++
++void change_1d_margin_multicast(
++ struct sysinfo *ctrl,
++ const enum margin_parameter param,
++ const int32_t value0,
++ const uint8_t rank,
++ const bool update_ctrl,
++ const enum regfile_mode regfile)
++{
++ change_margin(ctrl, param, value0, true, 0, rank, 0, update_ctrl, regfile);
++}
+diff --git a/src/northbridge/intel/haswell/native_raminit/raminit_native.h b/src/northbridge/intel/haswell/native_raminit/raminit_native.h
+index a36ebfacd1..500fc28909 100644
+--- a/src/northbridge/intel/haswell/native_raminit/raminit_native.h
++++ b/src/northbridge/intel/haswell/native_raminit/raminit_native.h
+@@ -35,6 +35,18 @@
+
+ #define RTTNOM_MASK (BIT(9) | BIT(6) | BIT(2))
+
++/* Margin parameter limits */
++#define MAX_POSSIBLE_TIME 31
++#define MAX_POSSIBLE_VREF 54
++
++#define MAX_POSSIBLE_BOTH MAX_POSSIBLE_VREF
++
++#define MIN_TIME (-MAX_POSSIBLE_TIME)
++#define MAX_TIME (MAX_POSSIBLE_TIME)
++
++#define MIN_VREF (-MAX_POSSIBLE_VREF)
++#define MAX_VREF (MAX_POSSIBLE_VREF)
++
+ #define BASIC_VA_PAT_SPREAD_8 0x01010101
+
+ #define WDB_CACHE_LINE_SIZE 8
+@@ -45,6 +57,14 @@
+ /* Specified in PI ticks. 64 PI ticks == 1 qclk */
+ #define tDQSCK_DRIFT 64
+
++enum margin_parameter {
++ RcvEna,
++ RdT,
++ WrT,
++ WrDqsT,
++ RdV,
++};
++
+ /* ZQ calibration types */
+ enum {
+ ZQ_INIT, /* DDR3: ZQCL with tZQinit, LPDDR3: ZQ Init with tZQinit */
+@@ -516,6 +536,25 @@ void download_regfile(
+ bool read_rf_rd,
+ bool read_rf_wr);
+
++void change_margin(
++ struct sysinfo *ctrl,
++ const enum margin_parameter param,
++ const int32_t value0,
++ const bool en_multicast,
++ const uint8_t channel,
++ const uint8_t rank,
++ const uint8_t byte,
++ const bool update_ctrl,
++ const enum regfile_mode regfile);
++
++void change_1d_margin_multicast(
++ struct sysinfo *ctrl,
++ const enum margin_parameter param,
++ const int32_t value0,
++ const uint8_t rank,
++ const bool update_ctrl,
++ const enum regfile_mode regfile);
++
+ uint8_t get_rx_bias(const struct sysinfo *ctrl);
+
+ uint8_t get_tCWL(uint32_t mem_clock_mhz);
+diff --git a/src/northbridge/intel/haswell/native_raminit/reg_structs.h b/src/northbridge/intel/haswell/native_raminit/reg_structs.h
+index b099f4bb82..a0e36ed082 100644
+--- a/src/northbridge/intel/haswell/native_raminit/reg_structs.h
++++ b/src/northbridge/intel/haswell/native_raminit/reg_structs.h
+@@ -25,6 +25,18 @@ union ddr_data_tx_train_rank_reg {
+ uint32_t raw;
+ };
+
++union ddr_data_offset_train_reg {
++ struct __packed {
++ int32_t rcven : 6; // Bits 5:0
++ int32_t rx_dqs : 6; // Bits 11:6
++ int32_t tx_dq : 6; // Bits 17:12
++ int32_t tx_dqs : 6; // Bits 23:18
++ int32_t vref : 7; // Bits 30:24
++ int32_t : 1; // Bits 31:31
++ };
++ uint32_t raw;
++};
++
+ union ddr_data_control_0_reg {
+ struct __packed {
+ uint32_t rx_training_mode : 1; // Bits 0:0
+diff --git a/src/northbridge/intel/haswell/registers/mchbar.h b/src/northbridge/intel/haswell/registers/mchbar.h
+index 9172d4f2b0..0acafbc826 100644
+--- a/src/northbridge/intel/haswell/registers/mchbar.h
++++ b/src/northbridge/intel/haswell/registers/mchbar.h
+@@ -21,6 +21,7 @@
+ #define DDR_DATA_TRAIN_FEEDBACK(ch, byte) _DDRIO_C_R_B(0x0054, ch, 0, byte)
+
+ #define DQ_CONTROL_2(ch, byte) _DDRIO_C_R_B(0x0064, ch, 0, byte)
++#define DDR_DATA_OFFSET_TRAIN_ch_b(ch, byte) _DDRIO_C_R_B(0x0070, ch, 0, byte)
+ #define DQ_CONTROL_0(ch, byte) _DDRIO_C_R_B(0x0074, ch, 0, byte)
+
+ /* DDR CKE per-channel */
+--
+2.39.2
+
diff --git a/resources/coreboot/haswell/patches/0023-haswell-NRI-Add-read-MPR-training.patch b/resources/coreboot/haswell/patches/0023-haswell-NRI-Add-read-MPR-training.patch
new file mode 100644
index 00000000..b13eb2db
--- /dev/null
+++ b/resources/coreboot/haswell/patches/0023-haswell-NRI-Add-read-MPR-training.patch
@@ -0,0 +1,331 @@
+From 6781cec818501f7afd6ee26464fd4556ac3068cb Mon Sep 17 00:00:00 2001
+From: Angel Pons <th3fanbus@gmail.com>
+Date: Sun, 8 May 2022 11:35:49 +0200
+Subject: [PATCH 23/26] haswell NRI: Add read MPR training
+
+Implement read training using DDR3 MPR (Multi-Purpose Register).
+
+Change-Id: Id17cb2c4c399ac9bcc937b595b58f863c152461b
+Signed-off-by: Angel Pons <th3fanbus@gmail.com>
+---
+ .../intel/haswell/native_raminit/Makefile.inc | 1 +
+ .../haswell/native_raminit/raminit_main.c | 1 +
+ .../haswell/native_raminit/raminit_native.h | 4 +
+ .../haswell/native_raminit/train_read_mpr.c | 240 ++++++++++++++++++
+ .../intel/haswell/registers/mchbar.h | 2 +-
+ 5 files changed, 247 insertions(+), 1 deletion(-)
+ create mode 100644 src/northbridge/intel/haswell/native_raminit/train_read_mpr.c
+
+diff --git a/src/northbridge/intel/haswell/native_raminit/Makefile.inc b/src/northbridge/intel/haswell/native_raminit/Makefile.inc
+index e2fbfb4211..c442be0728 100644
+--- a/src/northbridge/intel/haswell/native_raminit/Makefile.inc
++++ b/src/northbridge/intel/haswell/native_raminit/Makefile.inc
+@@ -16,4 +16,5 @@ romstage-y += setup_wdb.c
+ romstage-y += spd_bitmunching.c
+ romstage-y += testing_io.c
+ romstage-y += timings_refresh.c
++romstage-y += train_read_mpr.c
+ romstage-y += train_receive_enable.c
+diff --git a/src/northbridge/intel/haswell/native_raminit/raminit_main.c b/src/northbridge/intel/haswell/native_raminit/raminit_main.c
+index 7d444659c3..264d1468f5 100644
+--- a/src/northbridge/intel/haswell/native_raminit/raminit_main.c
++++ b/src/northbridge/intel/haswell/native_raminit/raminit_main.c
+@@ -61,6 +61,7 @@ static const struct task_entry cold_boot[] = {
+ { do_jedec_init, true, "JEDECINIT", },
+ { pre_training, true, "PRETRAIN", },
+ { train_receive_enable, true, "RCVET", },
++ { train_read_mpr, true, "RDMPRT", },
+ };
+
+ /* Return a generic stepping value to make stepping checks simpler */
+diff --git a/src/northbridge/intel/haswell/native_raminit/raminit_native.h b/src/northbridge/intel/haswell/native_raminit/raminit_native.h
+index 500fc28909..a7551ad63c 100644
+--- a/src/northbridge/intel/haswell/native_raminit/raminit_native.h
++++ b/src/northbridge/intel/haswell/native_raminit/raminit_native.h
+@@ -27,6 +27,8 @@
+ /* Always use 12 legs for emphasis (not trained) */
+ #define TXEQFULLDRV (3 << 4)
+
++#define LOOPCOUNT_INFINITE 0xff
++
+ /* DDR3 mode register bits */
+ #define MR0_DLL_RESET BIT(8)
+
+@@ -212,6 +214,7 @@ enum raminit_status {
+ RAMINIT_STATUS_POLL_TIMEOUT,
+ RAMINIT_STATUS_REUT_ERROR,
+ RAMINIT_STATUS_RCVEN_FAILURE,
++ RAMINIT_STATUS_RMPR_FAILURE,
+ RAMINIT_STATUS_UNSPECIFIED_ERROR, /** TODO: Deprecated in favor of specific values **/
+ };
+
+@@ -435,6 +438,7 @@ enum raminit_status configure_mc(struct sysinfo *ctrl);
+ enum raminit_status configure_memory_map(struct sysinfo *ctrl);
+ enum raminit_status do_jedec_init(struct sysinfo *ctrl);
+ enum raminit_status train_receive_enable(struct sysinfo *ctrl);
++enum raminit_status train_read_mpr(struct sysinfo *ctrl);
+
+ void configure_timings(struct sysinfo *ctrl);
+ void configure_refresh(struct sysinfo *ctrl);
+diff --git a/src/northbridge/intel/haswell/native_raminit/train_read_mpr.c b/src/northbridge/intel/haswell/native_raminit/train_read_mpr.c
+new file mode 100644
+index 0000000000..0225e1a384
+--- /dev/null
++++ b/src/northbridge/intel/haswell/native_raminit/train_read_mpr.c
+@@ -0,0 +1,240 @@
++/* SPDX-License-Identifier: GPL-2.0-or-later */
++
++#include <commonlib/clamp.h>
++#include <console/console.h>
++#include <delay.h>
++#include <northbridge/intel/haswell/haswell.h>
++#include <types.h>
++
++#include "raminit_native.h"
++#include "ranges.h"
++
++#define RMPR_START (-32)
++#define RMPR_STOP (32)
++#define RMPR_STEP 1
++
++#define RMPR_MIN_WIDTH 12
++
++#define RMPR_PLOT RAM_DEBUG
++
++/*
++ * Clear rx_training_mode. For LPDDR, we first need to disable odt_samp_extend_en,
++ * then disable rx_training_mode, and finally re-enable odt_samp_extend_en.
++ */
++static void clear_rx_training_mode(struct sysinfo *ctrl, const uint8_t channel)
++{
++ for (uint8_t byte = 0; byte < ctrl->lanes; byte++)
++ mchbar_write32(DQ_CONTROL_2(channel, byte), ctrl->dq_control_2[channel][byte]);
++
++ if (ctrl->lpddr) {
++ union ddr_data_control_0_reg data_control_0 = {
++ .raw = mchbar_read32(DDR_DATA_ch_CONTROL_0(channel)),
++ };
++ data_control_0.odt_samp_extend_en = 0;
++ mchbar_write32(DDR_DATA_ch_CONTROL_0(channel), data_control_0.raw);
++ tick_delay(1);
++ data_control_0.rx_training_mode = 0;
++ mchbar_write32(DDR_DATA_ch_CONTROL_0(channel), data_control_0.raw);
++ tick_delay(1);
++ }
++ mchbar_write32(DDR_DATA_ch_CONTROL_0(channel), ctrl->dq_control_0[channel]);
++}
++
++static void set_rxdqs_edges_to_midpoint(struct sysinfo *ctrl)
++{
++ for (uint8_t channel = 0; channel < NUM_CHANNELS; channel++) {
++ if (!does_ch_exist(ctrl, channel))
++ continue;
++
++ for (uint8_t rank = 0; rank < NUM_SLOTRANKS; rank++) {
++ if (!rank_in_ch(ctrl, rank, channel))
++ continue;
++
++ for (uint8_t byte = 0; byte < ctrl->lanes; byte++)
++ update_rxt(ctrl, channel, rank, byte, RXT_RXDQS_BOTH, 32);
++ }
++ }
++}
++
++static void enter_mpr_train_ddr_mode(struct sysinfo *ctrl, const uint8_t rank)
++{
++ /* Program MR3 and mask RAS/WE to prevent scheduler from issuing non-read commands */
++ for (uint8_t channel = 0; channel < NUM_CHANNELS; channel++) {
++ if (!rank_in_ch(ctrl, rank, channel))
++ continue;
++
++ if (!ctrl->lpddr)
++ reut_issue_mrs(ctrl, channel, BIT(rank), 3, 1 << 2);
++
++ union reut_misc_odt_ctrl_reg reut_misc_odt_ctrl = {
++ .raw = mchbar_read32(REUT_ch_MISC_ODT_CTRL(channel)),
++ };
++ reut_misc_odt_ctrl.mpr_train_ddr_on = 1;
++ mchbar_write32(REUT_ch_MISC_ODT_CTRL(channel), reut_misc_odt_ctrl.raw);
++ }
++}
++
++static void leave_mpr_train_ddr_mode(struct sysinfo *ctrl, const uint8_t rank)
++{
++ for (uint8_t channel = 0; channel < NUM_CHANNELS; channel++) {
++ if (!rank_in_ch(ctrl, rank, channel))
++ continue;
++
++ /*
++ * The mpr_train_ddr_on bit will force a special command.
++ * Therefore, clear it before issuing the MRS command.
++ */
++ union reut_misc_odt_ctrl_reg reut_misc_odt_ctrl = {
++ .raw = mchbar_read32(REUT_ch_MISC_ODT_CTRL(channel)),
++ };
++ reut_misc_odt_ctrl.mpr_train_ddr_on = 0;
++ mchbar_write32(REUT_ch_MISC_ODT_CTRL(channel), reut_misc_odt_ctrl.raw);
++ if (!ctrl->lpddr)
++ reut_issue_mrs(ctrl, channel, BIT(rank), 3, 0 << 2);
++ }
++}
++
++enum raminit_status train_read_mpr(struct sysinfo *ctrl)
++{
++ set_rxdqs_edges_to_midpoint(ctrl);
++ clear_data_offset_train_all(ctrl);
++ setup_io_test_mpr(ctrl, ctrl->chanmap, LOOPCOUNT_INFINITE, NSOE);
++ enum raminit_status status = RAMINIT_STATUS_SUCCESS;
++ for (uint8_t rank = 0; rank < NUM_SLOTRANKS; rank++) {
++ if (!does_rank_exist(ctrl, rank))
++ continue;
++
++ printk(BIOS_DEBUG, "Rank %u\n", rank);
++ printk(RMPR_PLOT, "Channel");
++ for (uint8_t channel = 0; channel < NUM_CHANNELS; channel++) {
++ if (!rank_in_ch(ctrl, rank, channel))
++ continue;
++
++ printk(RMPR_PLOT, "\t%u\t\t", channel);
++ }
++ printk(RMPR_PLOT, "\nByte");
++ for (uint8_t channel = 0; channel < NUM_CHANNELS; channel++) {
++ if (!rank_in_ch(ctrl, rank, channel))
++ continue;
++
++ printk(RMPR_PLOT, "\t");
++ for (uint8_t byte = 0; byte < ctrl->lanes; byte++)
++ printk(RMPR_PLOT, "%u ", byte);
++ }
++ enter_mpr_train_ddr_mode(ctrl, rank);
++ struct linear_train_data region_data[NUM_CHANNELS][NUM_LANES] = { 0 };
++ for (uint8_t channel = 0; channel < NUM_CHANNELS; channel++)
++ select_reut_ranks(ctrl, channel, BIT(rank));
++
++ printk(RMPR_PLOT, "\nDqsDelay\n");
++ int8_t dqs_delay;
++ for (dqs_delay = RMPR_START; dqs_delay < RMPR_STOP; dqs_delay += RMPR_STEP) {
++ printk(RMPR_PLOT, "% 5d", dqs_delay);
++ const enum regfile_mode regfile = REG_FILE_USE_START;
++ change_1d_margin_multicast(ctrl, RdT, dqs_delay, 0, false, regfile);
++ for (uint8_t channel = 0; channel < NUM_CHANNELS; channel++) {
++ if (!rank_in_ch(ctrl, rank, channel))
++ continue;
++
++ for (uint8_t byte = 0; byte < ctrl->lanes; byte++) {
++ union ddr_data_control_2_reg data_control_2 = {
++ .raw = ctrl->dq_control_2[channel][byte],
++ };
++ data_control_2.force_bias_on = 1;
++ data_control_2.force_rx_on = 1;
++ data_control_2.leaker_comp = 0;
++ mchbar_write32(DQ_CONTROL_2(channel, byte),
++ data_control_2.raw);
++ }
++ union ddr_data_control_0_reg data_control_0 = {
++ .raw = ctrl->dq_control_0[channel],
++ };
++ data_control_0.rx_training_mode = 1;
++ data_control_0.force_odt_on = !ctrl->lpddr;
++ data_control_0.en_read_preamble = 0;
++ data_control_0.odt_samp_extend_en = ctrl->lpddr;
++ const uint32_t reg_offset = DDR_DATA_ch_CONTROL_0(channel);
++ mchbar_write32(reg_offset, data_control_0.raw);
++ }
++ run_mpr_io_test(false);
++ for (uint8_t channel = 0; channel < NUM_CHANNELS; channel++) {
++ if (!rank_in_ch(ctrl, rank, channel))
++ continue;
++
++ printk(RMPR_PLOT, "\t");
++ for (uint8_t byte = 0; byte < ctrl->lanes; byte++) {
++ uint32_t fb = get_data_train_feedback(channel, byte);
++ const bool pass = fb == 1;
++ printk(RMPR_PLOT, pass ? ". " : "# ");
++ linear_record_pass(
++ &region_data[channel][byte],
++ pass,
++ dqs_delay,
++ RMPR_START,
++ RMPR_STEP);
++ }
++ }
++ printk(RMPR_PLOT, "\n");
++ for (uint8_t channel = 0; channel < NUM_CHANNELS; channel++) {
++ if (!rank_in_ch(ctrl, rank, channel))
++ continue;
++
++ clear_rx_training_mode(ctrl, channel);
++ }
++ io_reset();
++ }
++ printk(RMPR_PLOT, "\n");
++ leave_mpr_train_ddr_mode(ctrl, rank);
++ clear_data_offset_train_all(ctrl);
++ for (uint8_t channel = 0; channel < NUM_CHANNELS; channel++) {
++ if (!rank_in_ch(ctrl, rank, channel))
++ continue;
++
++ printk(BIOS_DEBUG, "C%u.R%u: \tLeft\tRight\tWidth\tCenter\tRxDqsPN\n",
++ channel, rank);
++ for (uint8_t byte = 0; byte < ctrl->lanes; byte++) {
++ struct linear_train_data *data = &region_data[channel][byte];
++ const int32_t lwidth = range_width(data->largest);
++ if (lwidth <= RMPR_MIN_WIDTH) {
++ printk(BIOS_ERR,
++ "Bad eye (lwidth %d <= min %d) for byte %u\n",
++ lwidth, RMPR_MIN_WIDTH, byte);
++ status = RAMINIT_STATUS_RMPR_FAILURE;
++ }
++ /*
++ * The MPR center may not be ideal on certain platforms for
++ * unknown reasons. If so, adjust it with a magical number.
++ * For Haswell, the magical number is zero. Hell knows why.
++ */
++ const int32_t center = range_center(data->largest);
++ ctrl->rxdqsp[channel][rank][byte] = center - RMPR_START;
++ ctrl->rxdqsn[channel][rank][byte] = center - RMPR_START;
++ printk(BIOS_DEBUG, " B%u: \t%d\t%d\t%d\t%d\t%u\n", byte,
++ data->largest.start, data->largest.end, lwidth,
++ center, ctrl->rxdqsp[channel][rank][byte]);
++ }
++ printk(BIOS_DEBUG, "\n");
++ }
++ }
++
++ /*
++ * Now program the DQS center values on populated ranks. data is taken from
++ * the host struct. We need to do it after all ranks are trained, because we
++ * need to keep the same DQS value on all ranks during the training procedure.
++ */
++ for (uint8_t channel = 0; channel < NUM_CHANNELS; channel++) {
++ if (!does_ch_exist(ctrl, channel))
++ continue;
++
++ for (uint8_t rank = 0; rank < NUM_SLOTRANKS; rank++) {
++ if (!rank_in_ch(ctrl, rank, channel))
++ continue;
++
++ for (uint8_t byte = 0; byte < ctrl->lanes; byte++)
++ update_rxt(ctrl, channel, rank, byte, RXT_RESTORE, 0);
++ }
++ }
++ change_1d_margin_multicast(ctrl, RdT, 0, 0, false, REG_FILE_USE_CURRENT);
++ io_reset();
++ return status;
++}
+diff --git a/src/northbridge/intel/haswell/registers/mchbar.h b/src/northbridge/intel/haswell/registers/mchbar.h
+index 0acafbc826..6a31d3a32c 100644
+--- a/src/northbridge/intel/haswell/registers/mchbar.h
++++ b/src/northbridge/intel/haswell/registers/mchbar.h
+@@ -122,7 +122,7 @@
+ #define REUT_ch_ERR_DATA_MASK(ch) _MCMAIN_C(0x40d8, ch)
+
+ #define REUT_ch_MISC_CKE_CTRL(ch) _MCMAIN_C(0x4190, ch)
+-
++#define REUT_ch_MISC_ODT_CTRL(ch) _MCMAIN_C(0x4194, ch)
+ #define REUT_ch_MISC_PAT_CADB_CTRL(ch) _MCMAIN_C(0x4198, ch)
+ #define REUT_ch_PAT_CADB_MRS(ch) _MCMAIN_C(0x419c, ch)
+ #define REUT_ch_PAT_CADB_MUX_CTRL(ch) _MCMAIN_C(0x41a0, ch)
+--
+2.39.2
+
diff --git a/resources/coreboot/haswell/patches/0024-haswell-NRI-Add-write-leveling.patch b/resources/coreboot/haswell/patches/0024-haswell-NRI-Add-write-leveling.patch
new file mode 100644
index 00000000..59e9af9d
--- /dev/null
+++ b/resources/coreboot/haswell/patches/0024-haswell-NRI-Add-write-leveling.patch
@@ -0,0 +1,688 @@
+From 20fe4fa852d3e13851a01b51dc984ec5976c864e Mon Sep 17 00:00:00 2001
+From: Angel Pons <th3fanbus@gmail.com>
+Date: Sun, 8 May 2022 12:56:04 +0200
+Subject: [PATCH 24/26] haswell NRI: Add write leveling
+
+Implement JEDEC write leveling, which is done in two steps. The first
+step uses the JEDEC procedure to do "fine" write leveling, i.e. align
+the DQS phase to the clock signal. The second step performs a regular
+read-write test to correct "coarse" cycle errors.
+
+Change-Id: I27678523fe22c38173a688e2a4751c259a20f009
+Signed-off-by: Angel Pons <th3fanbus@gmail.com>
+---
+ .../intel/haswell/native_raminit/Makefile.inc | 1 +
+ .../haswell/native_raminit/raminit_main.c | 1 +
+ .../haswell/native_raminit/raminit_native.h | 10 +
+ .../train_jedec_write_leveling.c | 580 ++++++++++++++++++
+ .../intel/haswell/registers/mchbar.h | 2 +
+ 5 files changed, 594 insertions(+)
+ create mode 100644 src/northbridge/intel/haswell/native_raminit/train_jedec_write_leveling.c
+
+diff --git a/src/northbridge/intel/haswell/native_raminit/Makefile.inc b/src/northbridge/intel/haswell/native_raminit/Makefile.inc
+index c442be0728..40c2f5e014 100644
+--- a/src/northbridge/intel/haswell/native_raminit/Makefile.inc
++++ b/src/northbridge/intel/haswell/native_raminit/Makefile.inc
+@@ -16,5 +16,6 @@ romstage-y += setup_wdb.c
+ romstage-y += spd_bitmunching.c
+ romstage-y += testing_io.c
+ romstage-y += timings_refresh.c
++romstage-y += train_jedec_write_leveling.c
+ romstage-y += train_read_mpr.c
+ romstage-y += train_receive_enable.c
+diff --git a/src/northbridge/intel/haswell/native_raminit/raminit_main.c b/src/northbridge/intel/haswell/native_raminit/raminit_main.c
+index 264d1468f5..1ff23be615 100644
+--- a/src/northbridge/intel/haswell/native_raminit/raminit_main.c
++++ b/src/northbridge/intel/haswell/native_raminit/raminit_main.c
+@@ -62,6 +62,7 @@ static const struct task_entry cold_boot[] = {
+ { pre_training, true, "PRETRAIN", },
+ { train_receive_enable, true, "RCVET", },
+ { train_read_mpr, true, "RDMPRT", },
++ { train_jedec_write_leveling, true, "JWRL", },
+ };
+
+ /* Return a generic stepping value to make stepping checks simpler */
+diff --git a/src/northbridge/intel/haswell/native_raminit/raminit_native.h b/src/northbridge/intel/haswell/native_raminit/raminit_native.h
+index a7551ad63c..666b233c45 100644
+--- a/src/northbridge/intel/haswell/native_raminit/raminit_native.h
++++ b/src/northbridge/intel/haswell/native_raminit/raminit_native.h
+@@ -59,6 +59,9 @@
+ /* Specified in PI ticks. 64 PI ticks == 1 qclk */
+ #define tDQSCK_DRIFT 64
+
++/* Maximum additional latency */
++#define MAX_ADD_DELAY 2
++
+ enum margin_parameter {
+ RcvEna,
+ RdT,
+@@ -215,6 +218,7 @@ enum raminit_status {
+ RAMINIT_STATUS_REUT_ERROR,
+ RAMINIT_STATUS_RCVEN_FAILURE,
+ RAMINIT_STATUS_RMPR_FAILURE,
++ RAMINIT_STATUS_JWRL_FAILURE,
+ RAMINIT_STATUS_UNSPECIFIED_ERROR, /** TODO: Deprecated in favor of specific values **/
+ };
+
+@@ -380,6 +384,11 @@ static inline uint32_t get_data_train_feedback(const uint8_t channel, const uint
+ return mchbar_read32(DDR_DATA_TRAIN_FEEDBACK(channel, byte));
+ }
+
++static inline uint16_t get_byte_group_errors(const uint8_t channel)
++{
++ return mchbar_read32(4 + REUT_ch_ERR_MISC_STATUS(channel)) & 0x1ff;
++}
++
+ /* Number of ticks to wait in units of 69.841279 ns (citation needed) */
+ static inline void tick_delay(const uint32_t delay)
+ {
+@@ -439,6 +448,7 @@ enum raminit_status configure_memory_map(struct sysinfo *ctrl);
+ enum raminit_status do_jedec_init(struct sysinfo *ctrl);
+ enum raminit_status train_receive_enable(struct sysinfo *ctrl);
+ enum raminit_status train_read_mpr(struct sysinfo *ctrl);
++enum raminit_status train_jedec_write_leveling(struct sysinfo *ctrl);
+
+ void configure_timings(struct sysinfo *ctrl);
+ void configure_refresh(struct sysinfo *ctrl);
+diff --git a/src/northbridge/intel/haswell/native_raminit/train_jedec_write_leveling.c b/src/northbridge/intel/haswell/native_raminit/train_jedec_write_leveling.c
+new file mode 100644
+index 0000000000..1ba28a3bd4
+--- /dev/null
++++ b/src/northbridge/intel/haswell/native_raminit/train_jedec_write_leveling.c
+@@ -0,0 +1,580 @@
++/* SPDX-License-Identifier: GPL-2.0-or-later */
++
++#include <assert.h>
++#include <console/console.h>
++#include <delay.h>
++#include <northbridge/intel/haswell/haswell.h>
++#include <types.h>
++
++#include "raminit_native.h"
++#include "ranges.h"
++
++#define JWLC_PLOT RAM_DEBUG
++#define JWRL_PLOT RAM_DEBUG
++
++static void reset_dram_dll(struct sysinfo *ctrl, const uint8_t channel, const uint8_t rank)
++{
++ reut_issue_mrs(ctrl, channel, BIT(rank), 0, ctrl->mr0[channel][rank] | MR0_DLL_RESET);
++}
++
++static void program_wdb_pattern(struct sysinfo *ctrl, const bool invert)
++{
++ /* Pattern to keep DQ-DQS simple but detect any failures. Same as NHM/WSM. */
++ const uint8_t pat[4][2] = {
++ { 0x00, 0xff },
++ { 0xff, 0x00 },
++ { 0xc3, 0x3c },
++ { 0x3c, 0xc3 },
++ };
++ const uint8_t pmask[2][8] = {
++ { 0, 0, 1, 1, 1, 1, 0, 0 },
++ { 1, 1, 0, 0, 0, 0, 1, 1 },
++ };
++ for (uint8_t s = 0; s < ARRAY_SIZE(pat); s++)
++ write_wdb_fixed_pat(ctrl, pat[s], pmask[invert], ARRAY_SIZE(pmask[invert]), s);
++}
++
++static int16_t set_add_delay(uint32_t *add_delay, uint8_t rank, int8_t target_off)
++{
++ const uint8_t shift = rank * 2;
++ if (target_off > MAX_ADD_DELAY) {
++ *add_delay &= ~(3 << shift);
++ *add_delay |= MAX_ADD_DELAY << shift;
++ return 128 * (target_off - MAX_ADD_DELAY);
++ } else if (target_off < 0) {
++ *add_delay &= ~(3 << shift);
++ *add_delay |= 0 << shift;
++ return 128 * target_off;
++ } else {
++ *add_delay &= ~(3 << shift);
++ *add_delay |= target_off << shift;
++ return 0;
++ }
++}
++
++static enum raminit_status train_jedec_write_leveling_cleanup(struct sysinfo *ctrl)
++{
++ const struct reut_box reut_addr = {
++ .col = {
++ .start = 0,
++ .stop = 1023,
++ .inc_val = 1,
++ },
++ };
++ const struct wdb_pat wdb_pattern = {
++ .start_ptr = 0,
++ .stop_ptr = 3,
++ .inc_rate = 1,
++ .dq_pattern = BASIC_VA,
++ };
++ const int8_t offsets[] = { 0, 1, -1, 2, 3 };
++ const int8_t dq_offsets[] = { 0, -10, 10, -5, 5, -15, 15 };
++ const uint8_t dq_offset_max = ARRAY_SIZE(dq_offsets);
++
++ /* Set LFSR seeds to be sequential */
++ program_wdb_lfsr(ctrl, true);
++ setup_io_test(
++ ctrl,
++ ctrl->chanmap,
++ PAT_WR_RD,
++ 2,
++ 4,
++ &reut_addr,
++ NSOE,
++ &wdb_pattern,
++ 0,
++ 0);
++
++ const union reut_pat_wdb_cl_mux_cfg_reg reut_wdb_cl_mux_cfg = {
++ .mux_0_control = REUT_MUX_BTBUFFER,
++ .mux_1_control = REUT_MUX_BTBUFFER,
++ .mux_2_control = REUT_MUX_BTBUFFER,
++ .ecc_data_source_sel = 1,
++ };
++ for (uint8_t channel = 0; channel < NUM_CHANNELS; channel++) {
++ if (!does_ch_exist(ctrl, channel))
++ continue;
++
++ mchbar_write32(REUT_ch_PAT_WDB_CL_MUX_CFG(channel), reut_wdb_cl_mux_cfg.raw);
++ }
++
++ int8_t byte_off[NUM_CHANNELS][NUM_LANES] = { 0 };
++ uint32_t add_delay[NUM_CHANNELS] = { 0 };
++ enum raminit_status status = RAMINIT_STATUS_SUCCESS;
++ bool invert = false;
++ const uint16_t valid_byte_mask = BIT(ctrl->lanes) - 1;
++ for (uint8_t rank = 0; rank < NUM_SLOTRANKS; rank++) {
++ uint8_t chanmask = 0;
++ for (uint8_t channel = 0; channel < NUM_CHANNELS; channel++)
++ chanmask |= select_reut_ranks(ctrl, channel, BIT(rank));
++
++ if (!chanmask)
++ continue;
++
++ printk(BIOS_DEBUG, "Rank %u\n", rank);
++ printk(JWLC_PLOT, "Channel");
++ for (uint8_t channel = 0; channel < NUM_CHANNELS; channel++) {
++ if (!rank_in_ch(ctrl, rank, channel))
++ continue;
++
++ printk(JWLC_PLOT, "\t\t%u\t", channel);
++ }
++ printk(JWLC_PLOT, "\nByte\t");
++ for (uint8_t channel = 0; channel < NUM_CHANNELS; channel++) {
++ if (!rank_in_ch(ctrl, rank, channel))
++ continue;
++
++ printk(JWLC_PLOT, "\t");
++ for (uint8_t byte = 0; byte < ctrl->lanes; byte++)
++ printk(JWLC_PLOT, "%u ", byte);
++ }
++ printk(JWLC_PLOT, "\nDelay DqOffset");
++ bool done = false;
++ int8_t byte_sum[NUM_CHANNELS] = { 0 };
++ uint16_t byte_pass[NUM_CHANNELS] = { 0 };
++ for (uint8_t off = 0; off < ARRAY_SIZE(offsets); off++) {
++ for (uint8_t channel = 0; channel < NUM_CHANNELS; channel++) {
++ if (!rank_in_ch(ctrl, rank, channel))
++ continue;
++
++ const int16_t global_byte_off =
++ set_add_delay(&add_delay[channel], rank, offsets[off]);
++ for (uint8_t byte = 0; byte < ctrl->lanes; byte++) {
++ update_txt(ctrl, channel, rank, byte, TXT_DQDQS_OFF,
++ global_byte_off);
++ }
++ mchbar_write32(SC_WR_ADD_DELAY_ch(channel),
++ add_delay[channel]);
++ }
++ /* Reset FIFOs and DRAM DLL (Micron workaround) */
++ if (!ctrl->lpddr) {
++ io_reset();
++ for (uint8_t channel = 0; channel < NUM_CHANNELS; channel++) {
++ if (!rank_in_ch(ctrl, rank, channel))
++ continue;
++
++ reset_dram_dll(ctrl, channel, rank);
++ }
++ udelay(1);
++ }
++ for (uint8_t dq_offset = 0; dq_offset < dq_offset_max; dq_offset++) {
++ printk(JWLC_PLOT, "\n% 3d\t% 3d",
++ offsets[off], dq_offsets[dq_offset]);
++ change_1d_margin_multicast(
++ ctrl,
++ WrT,
++ dq_offsets[dq_offset],
++ rank,
++ false,
++ REG_FILE_USE_RANK);
++
++ /*
++ * Re-program the WDB pattern. Change the pattern
++ * for the next test to avoid false pass issues.
++ */
++ program_wdb_pattern(ctrl, invert);
++ invert = !invert;
++ run_io_test(ctrl, chanmask, BASIC_VA, true);
++ done = true;
++ for (uint8_t channel = 0; channel < NUM_CHANNELS; channel++) {
++ if (!rank_in_ch(ctrl, rank, channel))
++ continue;
++
++ printk(JWLC_PLOT, "\t");
++ uint16_t result = get_byte_group_errors(channel);
++ result &= valid_byte_mask;
++
++ /* Skip bytes that have failed or already passed */
++ const uint16_t skip_me = result | byte_pass[channel];
++ for (uint8_t byte = 0; byte < ctrl->lanes; byte++) {
++ const bool pass = result & BIT(byte);
++ printk(JWLC_PLOT, pass ? "# " : ". ");
++ if (skip_me & BIT(byte))
++ continue;
++
++ byte_pass[channel] |= BIT(byte);
++ byte_off[channel][byte] = offsets[off];
++ byte_sum[channel] += offsets[off];
++ }
++ if (byte_pass[channel] != valid_byte_mask)
++ done = false;
++ }
++ if (done)
++ break;
++ }
++ if (done)
++ break;
++ }
++ printk(BIOS_DEBUG, "\n\n");
++ if (!done) {
++ printk(BIOS_ERR, "JWLC: Could not find a pass for all bytes\n");
++ for (uint8_t channel = 0; channel < NUM_CHANNELS; channel++) {
++ if (!rank_in_ch(ctrl, rank, channel))
++ continue;
++
++ printk(BIOS_ERR, "Channel %u, rank %u fail:", channel, rank);
++ const uint16_t passing_mask = byte_pass[channel];
++ for (uint8_t byte = 0; byte < ctrl->lanes; byte++) {
++ if (BIT(byte) & passing_mask)
++ continue;
++
++ printk(BIOS_ERR, " %u", byte);
++ }
++ printk(BIOS_ERR, "\n");
++ }
++ status = RAMINIT_STATUS_JWRL_FAILURE;
++ break;
++ }
++ for (uint8_t channel = 0; channel < NUM_CHANNELS; channel++) {
++ if (!rank_in_ch(ctrl, rank, channel))
++ continue;
++
++ /* Refine target offset to make sure it works for all bytes */
++ int8_t target_off = DIV_ROUND_CLOSEST(byte_sum[channel], ctrl->lanes);
++ int16_t global_byte_off = 0;
++ uint8_t all_good_loops = 0;
++ bool all_good = 0;
++ while (!all_good) {
++ global_byte_off =
++ set_add_delay(&add_delay[channel], rank, target_off);
++ all_good = true;
++ for (uint8_t byte = 0; byte < ctrl->lanes; byte++) {
++ int16_t local_offset;
++ local_offset = byte_off[channel][byte] - target_off;
++ local_offset = local_offset * 128 + global_byte_off;
++ const uint16_t tx_dq = ctrl->tx_dq[channel][rank][byte];
++ if (tx_dq + local_offset >= (512 - 64)) {
++ all_good = false;
++ all_good_loops++;
++ target_off++;
++ break;
++ }
++ const uint16_t txdqs = ctrl->tx_dq[channel][rank][byte];
++ if (txdqs + local_offset < 96) {
++ all_good = false;
++ all_good_loops++;
++ target_off--;
++ break;
++ }
++ }
++ /* Avoid an infinite loop */
++ if (all_good_loops > 3)
++ break;
++ }
++ if (!all_good) {
++ printk(BIOS_ERR, "JWLC: Target offset refining failed\n");
++ status = RAMINIT_STATUS_JWRL_FAILURE;
++ break;
++ }
++ printk(BIOS_DEBUG, "C%u.R%u: Offset\tFinalEdge\n", channel, rank);
++ for (uint8_t byte = 0; byte < ctrl->lanes; byte++) {
++ int16_t local_offset;
++ local_offset = byte_off[channel][byte] - target_off;
++ local_offset = local_offset * 128 + global_byte_off;
++ ctrl->tx_dq[channel][rank][byte] += local_offset;
++ ctrl->txdqs[channel][rank][byte] += local_offset;
++ update_txt(ctrl, channel, rank, byte, TXT_RESTORE, 0);
++ printk(BIOS_DEBUG, " B%u: %d\t%d\n", byte, local_offset,
++ ctrl->txdqs[channel][rank][byte]);
++ }
++ mchbar_write32(SC_WR_ADD_DELAY_ch(channel), add_delay[channel]);
++ if (!ctrl->lpddr) {
++ reset_dram_dll(ctrl, channel, rank);
++ udelay(1);
++ }
++ printk(BIOS_DEBUG, "\n");
++ }
++ printk(BIOS_DEBUG, "\n");
++ }
++
++ /* Restore WDB after test */
++ write_wdb_va_pat(ctrl, 0, BASIC_VA_PAT_SPREAD_8, 8, 0);
++ program_wdb_lfsr(ctrl, false);
++ mchbar_write32(DDR_DATA_OFFSET_TRAIN, 0);
++
++ /** TODO: Do full JEDEC init instead? **/
++ io_reset();
++ return status;
++}
++
++static enum raminit_status verify_wl_width(const int32_t lwidth)
++{
++ if (lwidth <= 32) {
++ /* Check if width is valid */
++ printk(BIOS_ERR, "WrLevel: Width region (%d) too small\n", lwidth);
++ return RAMINIT_STATUS_JWRL_FAILURE;
++ }
++ if (lwidth >= 96) {
++ /* Since we're calibrating a phase, a too large region is a problem */
++ printk(BIOS_ERR, "WrLevel: Width region (%d) too large\n", lwidth);
++ return RAMINIT_STATUS_JWRL_FAILURE;
++ }
++ return 0;
++}
++
++enum raminit_status train_jedec_write_leveling(struct sysinfo *ctrl)
++{
++ /*
++ * Enabling WL mode causes DQS to toggle for 1024 QCLK.
++ * Wait for this to stop. Round up to nearest microsecond.
++ */
++ const bool wl_long_delay = ctrl->lpddr;
++ const uint32_t dqs_toggle_time = wl_long_delay ? 2048 : 1024;
++ const uint32_t wait_time_us = DIV_ROUND_UP(ctrl->qclkps * dqs_toggle_time, 1000 * 1000);
++
++ const uint16_t wl_start = 192;
++ const uint16_t wl_stop = 192 + 128;
++ const uint16_t wl_step = 2;
++
++ /* Do not use cached MR values */
++ const bool save_restore_mrs = ctrl->restore_mrs;
++ ctrl->restore_mrs = 0;
++
++ /* Propagate delay values (without a write command) */
++ for (uint8_t channel = 0; channel < NUM_CHANNELS; channel++) {
++ if (!does_ch_exist(ctrl, channel))
++ continue;
++
++ /* Propagate delay values from rank 0 to prevent assertion failures in RTL */
++ union ddr_data_control_0_reg data_control_0 = {
++ .raw = ctrl->dq_control_0[channel],
++ };
++ data_control_0.read_rf_rd = 0;
++ data_control_0.read_rf_wr = 1;
++ data_control_0.read_rf_rank = 0;
++ mchbar_write32(DDR_DATA_ch_CONTROL_0(channel), data_control_0.raw);
++ for (uint8_t byte = 0; byte < ctrl->lanes; byte++) {
++ union ddr_data_control_2_reg data_control_2 = {
++ .raw = ctrl->dq_control_2[channel][byte],
++ };
++ data_control_2.force_bias_on = 1;
++ data_control_2.force_rx_on = 0;
++ data_control_2.wl_long_delay = wl_long_delay;
++ mchbar_write32(DQ_CONTROL_2(channel, byte), data_control_2.raw);
++ }
++ }
++
++ if (ctrl->lpddr)
++ die("%s: Missing LPDDR support\n", __func__);
++
++ if (!ctrl->lpddr)
++ ddr3_program_mr1(ctrl, 0, 1);
++
++ enum raminit_status status = RAMINIT_STATUS_SUCCESS;
++ struct phase_train_data region_data[NUM_CHANNELS][NUM_LANES] = { 0 };
++ for (uint8_t rank = 0; rank < NUM_SLOTRANKS; rank++) {
++ if (!does_rank_exist(ctrl, rank))
++ continue;
++
++ for (uint8_t channel = 0; channel < NUM_CHANNELS; channel++) {
++ if (!rank_in_ch(ctrl, rank, channel))
++ continue;
++
++ /** TODO: Differs for LPDDR **/
++ uint16_t mr1reg = ctrl->mr1[channel][rank];
++ mr1reg &= ~MR1_QOFF_ENABLE;
++ mr1reg |= MR1_WL_ENABLE;
++ if (is_hsw_ult()) {
++ mr1reg &= ~RTTNOM_MASK;
++ mr1reg |= encode_ddr3_rttnom(120);
++ } else if (ctrl->dpc[channel] == 2) {
++ mr1reg &= ~RTTNOM_MASK;
++ mr1reg |= encode_ddr3_rttnom(60);
++ }
++ reut_issue_mrs(ctrl, channel, BIT(rank), 1, mr1reg);
++
++ /* Assert ODT for myself */
++ uint8_t odt_matrix = BIT(rank);
++ if (ctrl->dpc[channel] == 2) {
++ /* Assert ODT for non-target DIMM */
++ const uint8_t other_dimm = ((rank + 2) / 2) & 1;
++ odt_matrix |= BIT(2 * other_dimm);
++ }
++
++ union reut_misc_odt_ctrl_reg reut_misc_odt_ctrl = {
++ .raw = 0,
++ };
++ if (ctrl->lpddr) {
++ /* Only one ODT pin for ULT */
++ reut_misc_odt_ctrl.odt_on = 1;
++ reut_misc_odt_ctrl.odt_override = 1;
++ } else if (!is_hsw_ult()) {
++ reut_misc_odt_ctrl.odt_on = odt_matrix;
++ reut_misc_odt_ctrl.odt_override = 0xf;
++ }
++ mchbar_write32(REUT_ch_MISC_ODT_CTRL(channel), reut_misc_odt_ctrl.raw);
++ }
++ for (uint8_t channel = 0; channel < NUM_CHANNELS; channel++) {
++ if (!rank_in_ch(ctrl, rank, channel))
++ continue;
++
++ /*
++ * Enable write leveling mode in DDR and propagate delay
++ * values (without a write command). Stay in WL mode.
++ */
++ union ddr_data_control_0_reg data_control_0 = {
++ .raw = ctrl->dq_control_0[channel],
++ };
++ data_control_0.wl_training_mode = 1;
++ data_control_0.tx_pi_on = 1;
++ data_control_0.read_rf_rd = 0;
++ data_control_0.read_rf_wr = 1;
++ data_control_0.read_rf_rank = rank;
++ mchbar_write32(DDR_DATA_ch_CONTROL_0(channel), data_control_0.raw);
++ }
++ printk(BIOS_DEBUG, "\nRank %u\n", rank);
++ printk(JWRL_PLOT, "Channel\t");
++ for (uint8_t channel = 0; channel < NUM_CHANNELS; channel++) {
++ if (!rank_in_ch(ctrl, rank, channel))
++ continue;
++
++ printk(JWRL_PLOT, "%u", channel);
++ if (channel > 0)
++ continue;
++
++ for (uint8_t byte = 0; byte < ctrl->lanes; byte++)
++ printk(JWRL_PLOT, "\t");
++ }
++ printk(JWRL_PLOT, "\nByte");
++ for (uint8_t channel = 0; channel < NUM_CHANNELS; channel++) {
++ if (!rank_in_ch(ctrl, rank, channel))
++ continue;
++
++ for (uint8_t byte = 0; byte < ctrl->lanes; byte++)
++ printk(JWRL_PLOT, "\t%u", byte);
++ }
++ printk(JWRL_PLOT, "\nWlDelay");
++ for (uint16_t wl_delay = wl_start; wl_delay < wl_stop; wl_delay += wl_step) {
++ printk(JWRL_PLOT, "\n %3u:", wl_delay);
++ for (uint8_t channel = 0; channel < NUM_CHANNELS; channel++) {
++ if (!rank_in_ch(ctrl, rank, channel))
++ continue;
++
++ for (uint8_t byte = 0; byte < ctrl->lanes; byte++) {
++ update_txt(ctrl, channel, rank, byte, TXT_TXDQS,
++ wl_delay);
++ }
++ }
++ /* Wait for the first burst to finish */
++ if (wl_delay == wl_start)
++ udelay(wait_time_us);
++
++ io_reset();
++ udelay(wait_time_us);
++ for (uint8_t channel = 0; channel < NUM_CHANNELS; channel++) {
++ if (!rank_in_ch(ctrl, rank, channel))
++ continue;
++
++ for (uint8_t byte = 0; byte < ctrl->lanes; byte++) {
++ const uint32_t feedback =
++ get_data_train_feedback(channel, byte);
++ const bool pass = (feedback & 0x1ff) >= 16;
++ printk(JWRL_PLOT, "\t%c%u", pass ? '.' : '#', feedback);
++ phase_record_pass(
++ &region_data[channel][byte],
++ pass,
++ wl_delay,
++ wl_start,
++ wl_step);
++ }
++ }
++ }
++ printk(JWRL_PLOT, "\n");
++ printk(BIOS_DEBUG, "\n\tInitSt\tInitEn\tCurrSt\tCurrEn\tLargSt\tLargEn\n");
++ for (uint8_t channel = 0; channel < NUM_CHANNELS; channel++) {
++ if (!rank_in_ch(ctrl, rank, channel))
++ continue;
++
++ printk(BIOS_DEBUG, "C%u\n", channel);
++ for (uint8_t byte = 0; byte < ctrl->lanes; byte++) {
++ struct phase_train_data *data = &region_data[channel][byte];
++
++ phase_append_initial_to_current(data, wl_start, wl_step);
++ printk(BIOS_DEBUG, " B%u:\t%d\t%d\t%d\t%d\t%d\t%d\n",
++ byte,
++ data->initial.start,
++ data->initial.end,
++ data->current.start,
++ data->current.end,
++ data->largest.start,
++ data->largest.end);
++ }
++ }
++
++ /*
++ * Clean up after test. Very coarsely adjust for
++ * any cycle errors. Program values for TxDQS.
++ */
++ for (uint8_t channel = 0; channel < NUM_CHANNELS; channel++) {
++ if (!rank_in_ch(ctrl, rank, channel))
++ continue;
++
++ /* Clear ODT before MRS (JEDEC spec) */
++ mchbar_write32(REUT_ch_MISC_ODT_CTRL(channel), 0);
++
++ /** TODO: Differs for LPDDR **/
++ const uint16_t mr1reg = ctrl->mr1[channel][rank] | MR1_QOFF_ENABLE;
++ reut_issue_mrs(ctrl, channel, BIT(rank), 1, mr1reg);
++
++ printk(BIOS_DEBUG, "\nC%u.R%u: LftEdge Width\n", channel, rank);
++ const bool rank_x16 = ctrl->dimms[channel][rank / 2].data.width == 16;
++ for (uint8_t byte = 0; byte < ctrl->lanes; byte++) {
++ struct phase_train_data *data = &region_data[channel][byte];
++ const int32_t lwidth = range_width(data->largest);
++ int32_t tx_start = data->largest.start;
++ printk(BIOS_DEBUG, " B%u: %d\t%d\n", byte, tx_start, lwidth);
++ status = verify_wl_width(lwidth);
++ if (status) {
++ printk(BIOS_ERR,
++ "WrLevel problems on channel %u, byte %u\n",
++ channel, byte);
++ goto clean_up;
++ }
++
++ /* Align byte pairs if DIMM is x16 */
++ if (rank_x16 && (byte & 1)) {
++ const struct phase_train_data *const ref_data =
++ &region_data[channel][byte - 1];
++
++ if (tx_start > ref_data->largest.start + 64)
++ tx_start -= 128;
++
++ if (tx_start < ref_data->largest.start - 64)
++ tx_start += 128;
++ }
++
++ /* Fix for b4618067 - need to add 1 QCLK to DQS PI */
++ if (is_hsw_ult())
++ tx_start += 64;
++
++ assert(tx_start >= 0);
++ ctrl->txdqs[channel][rank][byte] = tx_start;
++ ctrl->tx_dq[channel][rank][byte] = tx_start + 32;
++ update_txt(ctrl, channel, rank, byte, TXT_RESTORE, 0);
++ }
++ }
++ printk(BIOS_DEBUG, "\n");
++ }
++
++clean_up:
++ for (uint8_t channel = 0; channel < NUM_CHANNELS; channel++) {
++ if (!does_ch_exist(ctrl, channel))
++ continue;
++
++ mchbar_write32(DDR_DATA_ch_CONTROL_0(channel), ctrl->dq_control_0[channel]);
++ for (uint8_t byte = 0; byte < ctrl->lanes; byte++) {
++ mchbar_write32(DQ_CONTROL_2(channel, byte),
++ ctrl->dq_control_2[channel][byte]);
++ }
++ }
++ if (!ctrl->lpddr)
++ ddr3_program_mr1(ctrl, 0, 0);
++
++ ctrl->restore_mrs = save_restore_mrs;
++
++ if (status)
++ return status;
++
++ /** TODO: If this step fails and dec_wrd is set, clear it and try again **/
++ return train_jedec_write_leveling_cleanup(ctrl);
++}
+diff --git a/src/northbridge/intel/haswell/registers/mchbar.h b/src/northbridge/intel/haswell/registers/mchbar.h
+index 6a31d3a32c..7c0b5a49de 100644
+--- a/src/northbridge/intel/haswell/registers/mchbar.h
++++ b/src/northbridge/intel/haswell/registers/mchbar.h
+@@ -121,6 +121,8 @@
+
+ #define REUT_ch_ERR_DATA_MASK(ch) _MCMAIN_C(0x40d8, ch)
+
++#define REUT_ch_ERR_MISC_STATUS(ch) _MCMAIN_C(0x40e8, ch)
++
+ #define REUT_ch_MISC_CKE_CTRL(ch) _MCMAIN_C(0x4190, ch)
+ #define REUT_ch_MISC_ODT_CTRL(ch) _MCMAIN_C(0x4194, ch)
+ #define REUT_ch_MISC_PAT_CADB_CTRL(ch) _MCMAIN_C(0x4198, ch)
+--
+2.39.2
+
diff --git a/resources/coreboot/haswell/patches/0025-haswell-NRI-Add-final-raminit-steps.patch b/resources/coreboot/haswell/patches/0025-haswell-NRI-Add-final-raminit-steps.patch
new file mode 100644
index 00000000..d15ea5d1
--- /dev/null
+++ b/resources/coreboot/haswell/patches/0025-haswell-NRI-Add-final-raminit-steps.patch
@@ -0,0 +1,570 @@
+From d041b14f3af69db5f4598c84e3f53c9cd572ffb5 Mon Sep 17 00:00:00 2001
+From: Angel Pons <th3fanbus@gmail.com>
+Date: Sun, 8 May 2022 14:29:05 +0200
+Subject: [PATCH 25/26] haswell NRI: Add final raminit steps
+
+Implement the remaining raminit steps. Although many training steps are
+missing, this is enough to boot on the Asrock B85M Pro4.
+
+Change-Id: I94f3b65f0218d4da4fda4d84592dfd91f77f8f21
+Signed-off-by: Angel Pons <th3fanbus@gmail.com>
+---
+ src/northbridge/intel/haswell/Kconfig | 4 +-
+ .../intel/haswell/native_raminit/Makefile.inc | 1 +
+ .../haswell/native_raminit/activate_mc.c | 388 ++++++++++++++++++
+ .../haswell/native_raminit/raminit_main.c | 5 +-
+ .../haswell/native_raminit/raminit_native.c | 5 +-
+ .../haswell/native_raminit/raminit_native.h | 2 +
+ .../haswell/native_raminit/reg_structs.h | 12 +
+ .../intel/haswell/registers/mchbar.h | 7 +
+ 8 files changed, 416 insertions(+), 8 deletions(-)
+ create mode 100644 src/northbridge/intel/haswell/native_raminit/activate_mc.c
+
+diff --git a/src/northbridge/intel/haswell/Kconfig b/src/northbridge/intel/haswell/Kconfig
+index b659bf6d98..61f2a3c64c 100644
+--- a/src/northbridge/intel/haswell/Kconfig
++++ b/src/northbridge/intel/haswell/Kconfig
+@@ -10,12 +10,12 @@ config NORTHBRIDGE_INTEL_HASWELL
+ if NORTHBRIDGE_INTEL_HASWELL
+
+ config USE_NATIVE_RAMINIT
+- bool "[NOT WORKING] Use native raminit"
++ bool "[NOT COMPLETE] Use native raminit"
+ default n
+ select HAVE_DEBUG_RAM_SETUP
+ help
+ Select if you want to use coreboot implementation of raminit rather than
+- MRC.bin. Currently incomplete and does not boot.
++ MRC.bin. Currently incomplete and does not support S3 resume.
+
+ config HASWELL_VBOOT_IN_BOOTBLOCK
+ depends on VBOOT
+diff --git a/src/northbridge/intel/haswell/native_raminit/Makefile.inc b/src/northbridge/intel/haswell/native_raminit/Makefile.inc
+index 40c2f5e014..d97da72890 100644
+--- a/src/northbridge/intel/haswell/native_raminit/Makefile.inc
++++ b/src/northbridge/intel/haswell/native_raminit/Makefile.inc
+@@ -1,5 +1,6 @@
+ ## SPDX-License-Identifier: GPL-2.0-or-later
+
++romstage-y += activate_mc.c
+ romstage-y += change_margin.c
+ romstage-y += configure_mc.c
+ romstage-y += ddr3.c
+diff --git a/src/northbridge/intel/haswell/native_raminit/activate_mc.c b/src/northbridge/intel/haswell/native_raminit/activate_mc.c
+new file mode 100644
+index 0000000000..78a7ad27ef
+--- /dev/null
++++ b/src/northbridge/intel/haswell/native_raminit/activate_mc.c
+@@ -0,0 +1,388 @@
++/* SPDX-License-Identifier: GPL-2.0-or-later */
++
++#include <console/console.h>
++#include <delay.h>
++#include <device/pci_ops.h>
++#include <northbridge/intel/haswell/haswell.h>
++#include <timer.h>
++#include <types.h>
++
++#include "raminit_native.h"
++
++static void update_internal_clocks_on(struct sysinfo *ctrl)
++{
++ for (uint8_t channel = 0; channel < NUM_CHANNELS; channel++) {
++ if (!does_ch_exist(ctrl, channel))
++ continue;
++
++ bool clocks_on = false;
++ for (uint8_t byte = 0; byte < ctrl->lanes; byte++) {
++ const union ddr_data_control_1_reg data_control_1 = {
++ .raw = ctrl->dq_control_1[channel][byte],
++ };
++ const int8_t o_on = data_control_1.odt_delay;
++ const int8_t s_on = data_control_1.sense_amp_delay;
++ const int8_t o_off = data_control_1.odt_duration;
++ const int8_t s_off = data_control_1.sense_amp_duration;
++ if (o_on + o_off >= 7 || s_on + s_off >= 7) {
++ clocks_on = true;
++ break;
++ }
++ }
++ union ddr_data_control_0_reg data_control_0 = {
++ .raw = ctrl->dq_control_0[channel],
++ };
++ data_control_0.internal_clocks_on = clocks_on;
++ ctrl->dq_control_0[channel] = data_control_0.raw;
++ mchbar_write32(DDR_DATA_ch_CONTROL_0(channel), data_control_0.raw);
++ }
++}
++
++/* Switch off unused segments of the SDLL to save power */
++static void update_sdll_length(struct sysinfo *ctrl)
++{
++ for (uint8_t channel = 0; channel < NUM_CHANNELS; channel++) {
++ if (!does_ch_exist(ctrl, channel))
++ continue;
++
++ for (uint8_t byte = 0; byte < ctrl->lanes; byte++) {
++ uint8_t max_pi = 0;
++ for (uint8_t rank = 0; rank < NUM_SLOTRANKS; rank++) {
++ if (!rank_in_ch(ctrl, rank, channel))
++ continue;
++
++ const uint8_t rx_dqs_p = ctrl->rxdqsp[channel][rank][byte];
++ const uint8_t rx_dqs_n = ctrl->rxdqsn[channel][rank][byte];
++ max_pi = MAX(max_pi, MAX(rx_dqs_p, rx_dqs_n));
++ }
++ /* Update SDLL length for power savings */
++ union ddr_data_control_1_reg data_control_1 = {
++ .raw = ctrl->dq_control_1[channel][byte],
++ };
++ /* Calculate which segments to turn off */
++ data_control_1.sdll_segment_disable = (7 - (max_pi >> 3)) & ~1;
++ ctrl->dq_control_1[channel][byte] = data_control_1.raw;
++ mchbar_write32(DQ_CONTROL_1(channel, byte), data_control_1.raw);
++ }
++ }
++}
++
++static void set_rx_clk_stg_num(struct sysinfo *ctrl, const uint8_t channel)
++{
++ const uint8_t rcven_drift = ctrl->lpddr ? DIV_ROUND_UP(tDQSCK_DRIFT, ctrl->qclkps) : 1;
++ uint8_t max_rcven = 0;
++ for (uint8_t rank = 0; rank < NUM_SLOTRANKS; rank++) {
++ if (!rank_in_ch(ctrl, rank, channel))
++ continue;
++
++ for (uint8_t byte = 0; byte < ctrl->lanes; byte++)
++ max_rcven = MAX(max_rcven, ctrl->rcven[channel][rank][byte] / 64);
++ }
++ const union ddr_data_control_1_reg ddr_data_control_1 = {
++ .raw = ctrl->dq_control_1[channel][0],
++ };
++ const bool lpddr_long_odt = ddr_data_control_1.lpddr_long_odt_en;
++ const uint8_t rcven_turnoff = max_rcven + 18 + 2 * rcven_drift + lpddr_long_odt;
++ const union ddr_data_control_0_reg ddr_data_control_0 = {
++ .raw = ctrl->dq_control_0[channel],
++ };
++ for (uint8_t byte = 0; byte < ctrl->lanes; byte++) {
++ union ddr_data_control_2_reg ddr_data_control_2 = {
++ .raw = ctrl->dq_control_2[channel][byte],
++ };
++ if (ddr_data_control_0.odt_samp_extend_en) {
++ if (ddr_data_control_2.rx_clk_stg_num < rcven_turnoff)
++ ddr_data_control_2.rx_clk_stg_num = rcven_turnoff;
++ } else {
++ const int8_t o_on = ddr_data_control_1.odt_delay;
++ const int8_t o_off = ddr_data_control_1.odt_duration;
++ ddr_data_control_2.rx_clk_stg_num = MAX(17, o_on + o_off + 14);
++ }
++ ctrl->dq_control_2[channel][byte] = ddr_data_control_2.raw;
++ mchbar_write32(DQ_CONTROL_2(channel, byte), ddr_data_control_2.raw);
++ }
++}
++
++#define SELF_REFRESH_IDLE_COUNT 0x200
++
++static void enter_sr(void)
++{
++ mchbar_write32(PM_SREF_CONFIG, SELF_REFRESH_IDLE_COUNT | BIT(16));
++ udelay(1);
++}
++
++enum power_down_mode {
++ PDM_NO_PD = 0,
++ PDM_APD = 1,
++ PDM_PPD = 2,
++ PDM_PPD_DLL_OFF = 6,
++};
++
++static void power_down_config(struct sysinfo *ctrl)
++{
++ const enum power_down_mode pd_mode = ctrl->lpddr ? PDM_PPD : PDM_PPD_DLL_OFF;
++ mchbar_write32(PM_PDWN_CONFIG, pd_mode << 12 | 0x40);
++}
++
++static void train_power_modes_post(struct sysinfo *ctrl)
++{
++ for (uint8_t channel = 0; channel < NUM_CHANNELS; channel++) {
++ if (!does_ch_exist(ctrl, channel))
++ continue;
++
++ /* Adjust tCPDED and tPRPDEN */
++ if (ctrl->mem_clock_mhz >= 933)
++ ctrl->tc_bankrank_d[channel].tCPDED = 2;
++
++ if (ctrl->mem_clock_mhz >= 1066)
++ ctrl->tc_bankrank_d[channel].tPRPDEN = 2;
++
++ mchbar_write32(TC_BANK_RANK_D_ch(channel), ctrl->tc_bankrank_d[channel].raw);
++ }
++ power_down_config(ctrl);
++ mchbar_write32(MCDECS_CBIT, BIT(30)); /* dis_msg_clk_gate */
++}
++
++static uint8_t compute_burst_end_odt_delay(const struct sysinfo *const ctrl)
++{
++ /* Must be disabled for LPDDR */
++ if (ctrl->lpddr)
++ return 0;
++
++ const uint8_t beod = MIN(7, DIV_ROUND_CLOSEST(14300 * 20 / 100, ctrl->qclkps));
++ if (beod < 3)
++ return 0;
++
++ if (beod < 4)
++ return 4;
++
++ return beod;
++}
++
++static void program_burst_end_odt_delay(struct sysinfo *ctrl)
++{
++ /* Program burst_end_odt_delay - it should be zero during training steps */
++ const uint8_t beod = compute_burst_end_odt_delay(ctrl);
++ for (uint8_t channel = 0; channel < NUM_CHANNELS; channel++) {
++ if (!does_ch_exist(ctrl, channel))
++ continue;
++
++ for (uint8_t byte = 0; byte < ctrl->lanes; byte++) {
++ union ddr_data_control_1_reg ddr_data_control_1 = {
++ .raw = ctrl->dq_control_1[channel][byte],
++ };
++ ddr_data_control_1.burst_end_odt_delay = beod;
++ ctrl->dq_control_1[channel][byte] = ddr_data_control_1.raw;
++ mchbar_write32(DQ_CONTROL_1(channel, byte), ddr_data_control_1.raw);
++ }
++ }
++}
++
++/*
++ * Return a random value to use for scrambler seeds. Try to use RDRAND
++ * first and fall back to hardcoded values if RDRAND does not succeed.
++ */
++static uint16_t get_random_number(const uint8_t channel)
++{
++ /* The RDRAND instruction is only available 100k cycles after reset */
++ for (size_t i = 0; i < 100000; i++) {
++ uint32_t status;
++ uint32_t random;
++ /** TODO: Clean up asm **/
++ __asm__ __volatile__(
++ "\n\t .byte 0x0F, 0xC7, 0xF0"
++ "\n\t movl %%eax, %0"
++ "\n\t pushf"
++ "\n\t pop %%eax"
++ "\n\t movl %%eax, %1"
++ : "=m"(random),
++ "=m"(status)
++ : /* No inputs */
++ : "eax", "cc");
++
++ /* Only consider non-zero random values as valid */
++ if (status & 1 && random)
++ return random;
++ }
++
++ /* https://xkcd.com/221 */
++ if (channel)
++ return 0x28f4;
++ else
++ return 0x893e;
++}
++
++/* Work around "error: 'typeof' applied to a bit-field" */
++static inline uint32_t max(const uint32_t a, const uint32_t b)
++{
++ return MAX(a, b);
++}
++
++enum raminit_status activate_mc(struct sysinfo *ctrl)
++{
++ const bool enable_scrambling = true;
++ const bool enable_cmd_tristate = true;
++ for (uint8_t channel = 0; channel < NUM_CHANNELS; channel++) {
++ if (!does_ch_exist(ctrl, channel))
++ continue;
++
++ if (enable_scrambling && ctrl->stepping < STEPPING_C0) {
++ /* Make sure tRDRD_(sr, dr, dd) are at least 6 for scrambler W/A */
++ union tc_bank_rank_a_reg tc_bank_rank_a = {
++ .raw = mchbar_read32(TC_BANK_RANK_A_ch(channel)),
++ };
++ tc_bank_rank_a.tRDRD_sr = max(tc_bank_rank_a.tRDRD_sr, 6);
++ tc_bank_rank_a.tRDRD_dr = max(tc_bank_rank_a.tRDRD_dr, 6);
++ tc_bank_rank_a.tRDRD_dd = max(tc_bank_rank_a.tRDRD_dd, 6);
++ mchbar_write32(TC_BANK_RANK_A_ch(channel), tc_bank_rank_a.raw);
++ }
++ if (enable_scrambling) {
++ const union ddr_scramble_reg ddr_scramble = {
++ .scram_key = get_random_number(channel),
++ .scram_en = 1,
++ };
++ mchbar_write32(DDR_SCRAMBLE_ch(channel), ddr_scramble.raw);
++ }
++ if (ctrl->tCMD == 1) {
++ /* If we are in 1N mode, enable and set command rate limit to 3 */
++ union mcmain_command_rate_limit_reg cmd_rate_limit = {
++ .raw = mchbar_read32(COMMAND_RATE_LIMIT_ch(channel)),
++ };
++ cmd_rate_limit.enable_cmd_limit = 1;
++ cmd_rate_limit.cmd_rate_limit = 3;
++ mchbar_write32(COMMAND_RATE_LIMIT_ch(channel), cmd_rate_limit.raw);
++ }
++ if (enable_cmd_tristate) {
++ /* Enable command tri-state at the end of training */
++ union tc_bank_rank_a_reg tc_bank_rank_a = {
++ .raw = mchbar_read32(TC_BANK_RANK_A_ch(channel)),
++ };
++ tc_bank_rank_a.cmd_3st_dis = 0;
++ mchbar_write32(TC_BANK_RANK_A_ch(channel), tc_bank_rank_a.raw);
++ }
++ /* Set MC to normal mode and clean the ODT and CKE */
++ mchbar_write32(REUT_ch_SEQ_CFG(channel), REUT_MODE_NOP << 12);
++ /* Set again the rank occupancy */
++ mchbar_write8(MC_INIT_STATE_ch(channel), ctrl->rankmap[channel]);
++ if (ctrl->is_ecc) {
++ /* Enable ECC I/O and logic */
++ union mad_dimm_reg mad_dimm = {
++ .raw = mchbar_read32(MAD_DIMM(channel)),
++ };
++ mad_dimm.ecc_mode = 3;
++ mchbar_write32(MAD_DIMM(channel), mad_dimm.raw);
++ }
++ }
++
++ if (!is_hsw_ult())
++ update_internal_clocks_on(ctrl);
++
++ update_sdll_length(ctrl);
++
++ program_burst_end_odt_delay(ctrl);
++
++ if (is_hsw_ult()) {
++ for (uint8_t channel = 0; channel < NUM_CHANNELS; channel++) {
++ if (!does_ch_exist(ctrl, channel))
++ continue;
++
++ set_rx_clk_stg_num(ctrl, channel);
++ }
++ /** TODO: Program DDRPL_CR_DDR_TX_DELAY if Memory Trace is enabled **/
++ }
++
++ /* Enable periodic COMP */
++ mchbar_write32(M_COMP, (union pcu_comp_reg) {
++ .comp_interval = COMP_INT,
++ }.raw);
++
++ /* Enable the power mode before PCU starts working */
++ train_power_modes_post(ctrl);
++
++ /* Set idle timer and self refresh enable bits */
++ enter_sr();
++
++ /** FIXME: Do not hardcode power weights and RAPL settings **/
++ mchbar_write32(0x5888, 0x00000d0d);
++ mchbar_write32(0x5884, 0x00000004); /* 58.2 pJ */
++
++ mchbar_write32(0x58e0, 0);
++ mchbar_write32(0x58e4, 0);
++
++ mchbar_write32(0x5890, 0xffff);
++ mchbar_write32(0x5894, 0xffff);
++ mchbar_write32(0x5898, 0xffff);
++ mchbar_write32(0x589c, 0xffff);
++ mchbar_write32(0x58d0, 0xffff);
++ mchbar_write32(0x58d4, 0xffff);
++ mchbar_write32(0x58d8, 0xffff);
++ mchbar_write32(0x58dc, 0xffff);
++
++ /* Overwrite thermal parameters */
++ for (uint8_t channel = 0; channel < NUM_CHANNELS; channel++) {
++ mchbar_write32(_MCMAIN_C(0x42ec, channel), 0x0000000f);
++ mchbar_write32(_MCMAIN_C(0x42f0, channel), 0x00000009);
++ mchbar_write32(_MCMAIN_C(0x42f4, channel), 0x00000093);
++ mchbar_write32(_MCMAIN_C(0x42f8, channel), 0x00000087);
++ mchbar_write32(_MCMAIN_C(0x42fc, channel), 0x000000de);
++
++ /** TODO: Differs for LPDDR **/
++ mchbar_write32(PM_THRT_CKE_MIN_ch(channel), 0x30);
++ }
++ mchbar_write32(PCU_DDR_PTM_CTL, 0x40);
++ return RAMINIT_STATUS_SUCCESS;
++}
++
++static void mc_lockdown(void)
++{
++ /* Lock memory controller registers */
++ mchbar_write32(MC_LOCK, 0x8f);
++
++ /* MPCOHTRK_GDXC_OCLA_ADDRESS_HI_LOCK is set when programming the memory map */
++
++ /* Lock memory map registers */
++ pci_or_config16(HOST_BRIDGE, GGC, 1 << 0);
++ pci_or_config32(HOST_BRIDGE, DPR, 1 << 0);
++ pci_or_config32(HOST_BRIDGE, MESEG_LIMIT, 1 << 10);
++ pci_or_config32(HOST_BRIDGE, REMAPBASE, 1 << 0);
++ pci_or_config32(HOST_BRIDGE, REMAPLIMIT, 1 << 0);
++ pci_or_config32(HOST_BRIDGE, TOM, 1 << 0);
++ pci_or_config32(HOST_BRIDGE, TOUUD, 1 << 0);
++ pci_or_config32(HOST_BRIDGE, BDSM, 1 << 0);
++ pci_or_config32(HOST_BRIDGE, BGSM, 1 << 0);
++ pci_or_config32(HOST_BRIDGE, TOLUD, 1 << 0);
++}
++
++enum raminit_status raminit_done(struct sysinfo *ctrl)
++{
++ union mc_init_state_g_reg mc_init_state_g = {
++ .raw = mchbar_read32(MC_INIT_STATE_G),
++ };
++ mc_init_state_g.refresh_enable = 1;
++ mc_init_state_g.pu_mrc_done = 1;
++ mc_init_state_g.mrc_done = 1;
++ mchbar_write32(MC_INIT_STATE_G, mc_init_state_g.raw);
++
++ /* Lock the memory controller to enable normal operation */
++ mc_lockdown();
++
++ /* Poll for mc_init_done_ack to make sure memory initialization is complete */
++ printk(BIOS_DEBUG, "Waiting for mc_init_done acknowledgement... ");
++
++ struct stopwatch timer;
++ stopwatch_init_msecs_expire(&timer, 2000);
++ do {
++ mc_init_state_g.raw = mchbar_read32(MC_INIT_STATE_G);
++
++ /* DRAM will NOT work without the acknowledgement. There is no hope. */
++ if (stopwatch_expired(&timer))
++ die("\nTimed out waiting for mc_init_done acknowledgement\n");
++
++ } while (mc_init_state_g.mc_init_done_ack == 0);
++ printk(BIOS_DEBUG, "DONE!\n");
++
++ /* Provide some data for the graphics driver. Yes, it's hardcoded. */
++ mchbar_write32(SSKPD + 0, 0x05a2404f);
++ mchbar_write32(SSKPD + 4, 0x140000a0);
++ return RAMINIT_STATUS_SUCCESS;
++}
+diff --git a/src/northbridge/intel/haswell/native_raminit/raminit_main.c b/src/northbridge/intel/haswell/native_raminit/raminit_main.c
+index 1ff23be615..3a65fb01fb 100644
+--- a/src/northbridge/intel/haswell/native_raminit/raminit_main.c
++++ b/src/northbridge/intel/haswell/native_raminit/raminit_main.c
+@@ -63,6 +63,8 @@ static const struct task_entry cold_boot[] = {
+ { train_receive_enable, true, "RCVET", },
+ { train_read_mpr, true, "RDMPRT", },
+ { train_jedec_write_leveling, true, "JWRL", },
++ { activate_mc, true, "ACTIVATE", },
++ { raminit_done, true, "RAMINITEND", },
+ };
+
+ /* Return a generic stepping value to make stepping checks simpler */
+@@ -143,7 +145,4 @@ void raminit_main(const enum raminit_boot_mode bootmode)
+
+ if (status != RAMINIT_STATUS_SUCCESS)
+ die("Memory initialization was met with utmost failure and misery\n");
+-
+- /** TODO: Implement the required magic **/
+- die("NATIVE RAMINIT: More Magic (tm) required.\n");
+ }
+diff --git a/src/northbridge/intel/haswell/native_raminit/raminit_native.c b/src/northbridge/intel/haswell/native_raminit/raminit_native.c
+index bd9bc8e692..1ea729b23d 100644
+--- a/src/northbridge/intel/haswell/native_raminit/raminit_native.c
++++ b/src/northbridge/intel/haswell/native_raminit/raminit_native.c
+@@ -200,8 +200,6 @@ void perform_raminit(const int s3resume)
+ else
+ me_status = ME_INIT_STATUS_SUCCESS;
+
+- /** TODO: Remove this once raminit is implemented **/
+- me_status = ME_INIT_STATUS_ERROR;
+ intel_early_me_init_done(me_status);
+ }
+
+@@ -217,7 +215,8 @@ void perform_raminit(const int s3resume)
+ }
+
+ /* Save training data on non-S3 resumes */
+- if (!s3resume)
++ /** TODO: Enable this once training data is populated **/
++ if (0 && !s3resume)
+ save_mrc_data(&md);
+
+ /** TODO: setup_sdram_meminfo **/
+diff --git a/src/northbridge/intel/haswell/native_raminit/raminit_native.h b/src/northbridge/intel/haswell/native_raminit/raminit_native.h
+index 666b233c45..98e39cb76e 100644
+--- a/src/northbridge/intel/haswell/native_raminit/raminit_native.h
++++ b/src/northbridge/intel/haswell/native_raminit/raminit_native.h
+@@ -449,6 +449,8 @@ enum raminit_status do_jedec_init(struct sysinfo *ctrl);
+ enum raminit_status train_receive_enable(struct sysinfo *ctrl);
+ enum raminit_status train_read_mpr(struct sysinfo *ctrl);
+ enum raminit_status train_jedec_write_leveling(struct sysinfo *ctrl);
++enum raminit_status activate_mc(struct sysinfo *ctrl);
++enum raminit_status raminit_done(struct sysinfo *ctrl);
+
+ void configure_timings(struct sysinfo *ctrl);
+ void configure_refresh(struct sysinfo *ctrl);
+diff --git a/src/northbridge/intel/haswell/native_raminit/reg_structs.h b/src/northbridge/intel/haswell/native_raminit/reg_structs.h
+index a0e36ed082..0d9aaa1f7c 100644
+--- a/src/northbridge/intel/haswell/native_raminit/reg_structs.h
++++ b/src/northbridge/intel/haswell/native_raminit/reg_structs.h
+@@ -294,6 +294,18 @@ union ddr_cke_ctl_controls_reg {
+ uint32_t raw;
+ };
+
++union ddr_scramble_reg {
++ struct __packed {
++ uint32_t scram_en : 1; // Bits 0:0
++ uint32_t scram_key : 16; // Bits 16:1
++ uint32_t clk_gate_ab : 2; // Bits 18:17
++ uint32_t clk_gate_c : 2; // Bits 20:19
++ uint32_t en_dbi_ab : 1; // Bits 21:21
++ uint32_t : 10; // Bits 31:17
++ };
++ uint32_t raw;
++};
++
+ union ddr_scram_misc_control_reg {
+ struct __packed {
+ uint32_t wl_wake_cycles : 2; // Bits 1:0
+diff --git a/src/northbridge/intel/haswell/registers/mchbar.h b/src/northbridge/intel/haswell/registers/mchbar.h
+index 7c0b5a49de..49a215aa71 100644
+--- a/src/northbridge/intel/haswell/registers/mchbar.h
++++ b/src/northbridge/intel/haswell/registers/mchbar.h
+@@ -20,6 +20,7 @@
+
+ #define DDR_DATA_TRAIN_FEEDBACK(ch, byte) _DDRIO_C_R_B(0x0054, ch, 0, byte)
+
++#define DQ_CONTROL_1(ch, byte) _DDRIO_C_R_B(0x0060, ch, 0, byte)
+ #define DQ_CONTROL_2(ch, byte) _DDRIO_C_R_B(0x0064, ch, 0, byte)
+ #define DDR_DATA_OFFSET_TRAIN_ch_b(ch, byte) _DDRIO_C_R_B(0x0070, ch, 0, byte)
+ #define DQ_CONTROL_0(ch, byte) _DDRIO_C_R_B(0x0074, ch, 0, byte)
+@@ -147,6 +148,8 @@
+ #define QCLK_ch_LDAT_SDAT(ch) _MCMAIN_C(0x42d4, ch)
+ #define QCLK_ch_LDAT_DATA_IN_x(ch, x) _MCMAIN_C_X(0x42dc, ch, x) /* x in 0 .. 1 */
+
++#define PM_THRT_CKE_MIN_ch(ch) _MCMAIN_C(0x4328, ch)
++
+ #define REUT_GLOBAL_CTL 0x4800
+ #define REUT_GLOBAL_ERR 0x4804
+
+@@ -175,6 +178,8 @@
+
+ #define MCSCHEDS_DFT_MISC 0x4c30
+
++#define PM_PDWN_CONFIG 0x4cb0
++
+ #define REUT_ERR_DATA_STATUS 0x4ce0
+
+ #define REUT_MISC_CKE_CTRL 0x4d90
+@@ -186,8 +191,10 @@
+ #define MAD_CHNL 0x5000 /* Address Decoder Channel Configuration */
+ #define MAD_DIMM(ch) (0x5004 + (ch) * 4)
+ #define MAD_ZR 0x5014
++#define MCDECS_CBIT 0x501c
+ #define MC_INIT_STATE_G 0x5030
+ #define MRC_REVISION 0x5034 /* MRC Revision */
++#define PM_SREF_CONFIG 0x5060
+
+ #define RCOMP_TIMER 0x5084
+
+--
+2.39.2
+
diff --git a/resources/coreboot/haswell/patches/0026-specifically-use-python3-in-scripts.patch b/resources/coreboot/haswell/patches/0026-specifically-use-python3-in-scripts.patch
new file mode 100644
index 00000000..8f957cb9
--- /dev/null
+++ b/resources/coreboot/haswell/patches/0026-specifically-use-python3-in-scripts.patch
@@ -0,0 +1,36 @@
+From 9a65c1e4ca8a0f0089fd8e8ee9c8690aefce2133 Mon Sep 17 00:00:00 2001
+From: Leah Rowe <leah@libreboot.org>
+Date: Sun, 13 Mar 2022 18:04:55 +0000
+Subject: [PATCH 26/26] specifically use python3, in scripts
+
+---
+ src/drivers/intel/fsp2_0/Makefile.inc | 2 +-
+ util/spdtool/spdtool.py | 2 +-
+ 2 files changed, 2 insertions(+), 2 deletions(-)
+
+diff --git a/src/drivers/intel/fsp2_0/Makefile.inc b/src/drivers/intel/fsp2_0/Makefile.inc
+index f5641ac182..d807320f29 100644
+--- a/src/drivers/intel/fsp2_0/Makefile.inc
++++ b/src/drivers/intel/fsp2_0/Makefile.inc
+@@ -87,7 +87,7 @@ endif
+
+ ifeq ($(CONFIG_FSP_FULL_FD),y)
+ $(obj)/Fsp_M.fd: $(call strip_quotes,$(CONFIG_FSP_FD_PATH)) $(DOTCONFIG)
+- python 3rdparty/fsp/Tools/SplitFspBin.py split -f $(CONFIG_FSP_FD_PATH) -o "$(obj)" -n "Fsp.fd"
++ python3 3rdparty/fsp/Tools/SplitFspBin.py split -f $(CONFIG_FSP_FD_PATH) -o "$(obj)" -n "Fsp.fd"
+
+ $(obj)/Fsp_S.fd: $(call strip_quotes,$(CONFIG_FSP_FD_PATH)) $(obj)/Fsp_M.fd
+ true
+diff --git a/util/spdtool/spdtool.py b/util/spdtool/spdtool.py
+index 89976eac59..2cd7027377 100644
+--- a/util/spdtool/spdtool.py
++++ b/util/spdtool/spdtool.py
+@@ -1,4 +1,4 @@
+-#!/usr/bin/env python
++#!/usr/bin/env python3
+ # spdtool - Tool for partial deblobbing of UEFI firmware images
+ # SPDX-License-Identifier: GPL-3.0-or-later
+ #
+--
+2.39.2
+