summaryrefslogtreecommitdiff
path: root/config/coreboot/haswell/patches
diff options
context:
space:
mode:
Diffstat (limited to 'config/coreboot/haswell/patches')
-rw-r--r--config/coreboot/haswell/patches/0001-commonlib-clamp.h-Add-more-clamping-functions.patch54
-rw-r--r--config/coreboot/haswell/patches/0002-nb-intel-haswell-Introduce-option-to-not-use-MRC.bin.patch143
-rw-r--r--config/coreboot/haswell/patches/0003-haswell-lynxpoint-Add-native-DMI-init.patch615
-rw-r--r--config/coreboot/haswell/patches/0004-haswell-lynxpoint-Add-native-early-ME-init.patch148
-rw-r--r--config/coreboot/haswell/patches/0005-sb-intel-lynxpoint-Add-native-USB-init.patch783
-rw-r--r--config/coreboot/haswell/patches/0006-sb-intel-lynxpoint-Add-native-thermal-init.patch128
-rw-r--r--config/coreboot/haswell/patches/0007-sb-intel-lynxpoint-Add-native-PCH-init.patch785
-rw-r--r--config/coreboot/haswell/patches/0008-nb-intel-haswell-Add-native-raminit-scaffolding.patch407
-rw-r--r--config/coreboot/haswell/patches/0009-nb-intel-haswell-nri-Only-do-CPU-replacement-check-o.patch57
-rw-r--r--config/coreboot/haswell/patches/0010-haswell-NRI-Collect-SPD-info.patch344
-rw-r--r--config/coreboot/haswell/patches/0011-haswell-NRI-Initialise-MPLL.patch346
-rw-r--r--config/coreboot/haswell/patches/0012-haswell-NRI-Post-process-selected-timings.patch249
-rw-r--r--config/coreboot/haswell/patches/0013-haswell-NRI-Configure-initial-MC-settings.patch1593
-rw-r--r--config/coreboot/haswell/patches/0014-haswell-NRI-Add-timings-refresh-programming.patch541
-rw-r--r--config/coreboot/haswell/patches/0015-haswell-NRI-Program-memory-map.patch263
-rw-r--r--config/coreboot/haswell/patches/0016-haswell-NRI-Add-DDR3-JEDEC-reset-and-init.patch1038
-rw-r--r--config/coreboot/haswell/patches/0017-haswell-NRI-Add-pre-training-steps.patch384
-rw-r--r--config/coreboot/haswell/patches/0018-haswell-NRI-Add-REUT-I-O-test-library.patch1128
-rw-r--r--config/coreboot/haswell/patches/0019-haswell-NRI-Add-range-tracking-library.patch222
-rw-r--r--config/coreboot/haswell/patches/0020-haswell-NRI-Add-library-to-change-margins.patch294
-rw-r--r--config/coreboot/haswell/patches/0021-haswell-NRI-Add-RcvEn-training.patch708
-rw-r--r--config/coreboot/haswell/patches/0022-haswell-NRI-Add-function-to-change-margins.patch272
-rw-r--r--config/coreboot/haswell/patches/0023-haswell-NRI-Add-read-MPR-training.patch331
-rw-r--r--config/coreboot/haswell/patches/0024-haswell-NRI-Add-write-leveling.patch688
-rw-r--r--config/coreboot/haswell/patches/0025-haswell-NRI-Add-final-raminit-steps.patch570
-rw-r--r--config/coreboot/haswell/patches/0026-Remove-warning-for-coreboot-images-built-without-a-p.patch38
-rw-r--r--config/coreboot/haswell/patches/0027-coreboot-haswell-fix-acpica-downloads.patch30
27 files changed, 12159 insertions, 0 deletions
diff --git a/config/coreboot/haswell/patches/0001-commonlib-clamp.h-Add-more-clamping-functions.patch b/config/coreboot/haswell/patches/0001-commonlib-clamp.h-Add-more-clamping-functions.patch
new file mode 100644
index 00000000..96e4c14d
--- /dev/null
+++ b/config/coreboot/haswell/patches/0001-commonlib-clamp.h-Add-more-clamping-functions.patch
@@ -0,0 +1,54 @@
+From dd58f5e9108bc596c93071705d2b53233d13ade6 Mon Sep 17 00:00:00 2001
+From: Angel Pons <th3fanbus@gmail.com>
+Date: Sat, 7 May 2022 20:36:10 +0200
+Subject: [PATCH 01/26] commonlib/clamp.h: Add more clamping functions
+
+Add more clamping functions that work with different types.
+
+Change-Id: I14cf335d5a54f769f8fd9184450957e876affd6b
+Signed-off-by: Angel Pons <th3fanbus@gmail.com>
+---
+ src/commonlib/include/commonlib/clamp.h | 26 +++++++++++++++++--------
+ 1 file changed, 18 insertions(+), 8 deletions(-)
+
+diff --git a/src/commonlib/include/commonlib/clamp.h b/src/commonlib/include/commonlib/clamp.h
+index e01a107ed4..526185195c 100644
+--- a/src/commonlib/include/commonlib/clamp.h
++++ b/src/commonlib/include/commonlib/clamp.h
+@@ -8,15 +8,25 @@
+ /*
+ * Clamp a value, so that it is between a lower and an upper bound.
+ */
+-static inline u32 clamp_u32(const u32 min, const u32 val, const u32 max)
+-{
+- if (val > max)
+- return max;
++#define __MAKE_CLAMP_FUNC(type) \
++ static inline type clamp_##type(const type min, const type val, const type max) \
++ { \
++ if (val > max) \
++ return max; \
++ if (val < min) \
++ return min; \
++ return val; \
++ } \
+
+- if (val < min)
+- return min;
++__MAKE_CLAMP_FUNC(s8) /* clamp_s8 */
++__MAKE_CLAMP_FUNC(u8) /* clamp_u8 */
++__MAKE_CLAMP_FUNC(s16) /* clamp_s16 */
++__MAKE_CLAMP_FUNC(u16) /* clamp_u16 */
++__MAKE_CLAMP_FUNC(s32) /* clamp_s32 */
++__MAKE_CLAMP_FUNC(u32) /* clamp_u32 */
++__MAKE_CLAMP_FUNC(s64) /* clamp_s64 */
++__MAKE_CLAMP_FUNC(u64) /* clamp_u64 */
+
+- return val;
+-}
++#undef __MAKE_CLAMP_FUNC
+
+ #endif /* COMMONLIB_CLAMP_H */
+--
+2.39.2
+
diff --git a/config/coreboot/haswell/patches/0002-nb-intel-haswell-Introduce-option-to-not-use-MRC.bin.patch b/config/coreboot/haswell/patches/0002-nb-intel-haswell-Introduce-option-to-not-use-MRC.bin.patch
new file mode 100644
index 00000000..35d5c89e
--- /dev/null
+++ b/config/coreboot/haswell/patches/0002-nb-intel-haswell-Introduce-option-to-not-use-MRC.bin.patch
@@ -0,0 +1,143 @@
+From c07391821c32cafea950574b85468f5b3284b6df Mon Sep 17 00:00:00 2001
+From: Angel Pons <th3fanbus@gmail.com>
+Date: Fri, 6 May 2022 21:12:14 +0200
+Subject: [PATCH 02/26] nb/intel/haswell: Introduce option to not use MRC.bin
+
+Introduce the `USE_NATIVE_RAMINIT` Kconfig option, which should allow
+booting coreboot on Haswell mainboards without the need of the closed
+source MRC.bin. For now, this option does not work at all; the needed
+magic will be implemented in subsequent commits. Add a config file to
+make sure the newly-introduced option gets build-tested.
+
+Change-Id: I46c77586f9b5771624082e07c60c205e578edd8e
+Signed-off-by: Angel Pons <th3fanbus@gmail.com>
+---
+ configs/config.asrock_b85m_pro4.native_raminit | 5 +++++
+ src/northbridge/intel/haswell/Kconfig | 13 +++++++++++++
+ src/northbridge/intel/haswell/Makefile.inc | 7 ++++++-
+ .../intel/haswell/native_raminit/Makefile.inc | 3 +++
+ .../intel/haswell/native_raminit/raminit_native.c | 15 +++++++++++++++
+ 5 files changed, 42 insertions(+), 1 deletion(-)
+ create mode 100644 configs/config.asrock_b85m_pro4.native_raminit
+ create mode 100644 src/northbridge/intel/haswell/native_raminit/Makefile.inc
+ create mode 100644 src/northbridge/intel/haswell/native_raminit/raminit_native.c
+
+diff --git a/configs/config.asrock_b85m_pro4.native_raminit b/configs/config.asrock_b85m_pro4.native_raminit
+new file mode 100644
+index 0000000000..2de538926f
+--- /dev/null
++++ b/configs/config.asrock_b85m_pro4.native_raminit
+@@ -0,0 +1,5 @@
++# Configuration used to build-test native raminit
++CONFIG_VENDOR_ASROCK=y
++CONFIG_BOARD_ASROCK_B85M_PRO4=y
++CONFIG_USE_NATIVE_RAMINIT=y
++CONFIG_DEBUG_RAM_SETUP=y
+diff --git a/src/northbridge/intel/haswell/Kconfig b/src/northbridge/intel/haswell/Kconfig
+index 50acb09a91..b659bf6d98 100644
+--- a/src/northbridge/intel/haswell/Kconfig
++++ b/src/northbridge/intel/haswell/Kconfig
+@@ -9,6 +9,14 @@ config NORTHBRIDGE_INTEL_HASWELL
+
+ if NORTHBRIDGE_INTEL_HASWELL
+
++config USE_NATIVE_RAMINIT
++ bool "[NOT WORKING] Use native raminit"
++ default n
++ select HAVE_DEBUG_RAM_SETUP
++ help
++ Select if you want to use coreboot implementation of raminit rather than
++ MRC.bin. Currently incomplete and does not boot.
++
+ config HASWELL_VBOOT_IN_BOOTBLOCK
+ depends on VBOOT
+ bool "Start verstage in bootblock"
+@@ -45,6 +53,7 @@ config DCACHE_RAM_BASE
+
+ config DCACHE_RAM_SIZE
+ hex
++ default 0x40000 if USE_NATIVE_RAMINIT
+ default 0x10000
+ help
+ The size of the cache-as-ram region required during bootblock
+@@ -53,12 +62,14 @@ config DCACHE_RAM_SIZE
+
+ config DCACHE_RAM_MRC_VAR_SIZE
+ hex
++ default 0x0 if USE_NATIVE_RAMINIT
+ default 0x30000
+ help
+ The amount of cache-as-ram region required by the reference code.
+
+ config DCACHE_BSP_STACK_SIZE
+ hex
++ default 0x20000 if USE_NATIVE_RAMINIT
+ default 0x2000
+ help
+ The amount of anticipated stack usage in CAR by bootblock and
+@@ -66,6 +77,7 @@ config DCACHE_BSP_STACK_SIZE
+
+ config HAVE_MRC
+ bool "Add a System Agent binary"
++ depends on !USE_NATIVE_RAMINIT
+ help
+ Select this option to add a System Agent binary to
+ the resulting coreboot image.
+@@ -82,6 +94,7 @@ config MRC_FILE
+
+ config HASWELL_HIDE_PEG_FROM_MRC
+ bool "Hide PEG devices from MRC to work around hardcoded MRC behavior"
++ depends on !USE_NATIVE_RAMINIT
+ default y
+ help
+ If set, hides all PEG devices from MRC. This allows the iGPU
+diff --git a/src/northbridge/intel/haswell/Makefile.inc b/src/northbridge/intel/haswell/Makefile.inc
+index 2d1532be05..329f1f7ffe 100644
+--- a/src/northbridge/intel/haswell/Makefile.inc
++++ b/src/northbridge/intel/haswell/Makefile.inc
+@@ -19,6 +19,11 @@ romstage-y += report_platform.c
+
+ postcar-y += memmap.c
+
+-subdirs-y += haswell_mrc
++ifeq ($(CONFIG_USE_NATIVE_RAMINIT),y)
++subdirs-y += native_raminit
++
++else
++subdirs-y += haswell_mrc
++endif
+
+ endif
+diff --git a/src/northbridge/intel/haswell/native_raminit/Makefile.inc b/src/northbridge/intel/haswell/native_raminit/Makefile.inc
+new file mode 100644
+index 0000000000..8cfb4fb33e
+--- /dev/null
++++ b/src/northbridge/intel/haswell/native_raminit/Makefile.inc
+@@ -0,0 +1,3 @@
++## SPDX-License-Identifier: GPL-2.0-or-later
++
++romstage-y += raminit_native.c
+diff --git a/src/northbridge/intel/haswell/native_raminit/raminit_native.c b/src/northbridge/intel/haswell/native_raminit/raminit_native.c
+new file mode 100644
+index 0000000000..1aafdf8659
+--- /dev/null
++++ b/src/northbridge/intel/haswell/native_raminit/raminit_native.c
+@@ -0,0 +1,15 @@
++/* SPDX-License-Identifier: GPL-2.0-or-later */
++
++#include <console/console.h>
++#include <northbridge/intel/haswell/raminit.h>
++
++void perform_raminit(const int s3resume)
++{
++ /*
++ * See, this function's name is a lie. There are more things to
++ * do that memory initialisation, but they are relatively easy.
++ */
++
++ /** TODO: Implement the required magic **/
++ die("NATIVE RAMINIT: More Magic (tm) required.\n");
++}
+--
+2.39.2
+
diff --git a/config/coreboot/haswell/patches/0003-haswell-lynxpoint-Add-native-DMI-init.patch b/config/coreboot/haswell/patches/0003-haswell-lynxpoint-Add-native-DMI-init.patch
new file mode 100644
index 00000000..4e70407c
--- /dev/null
+++ b/config/coreboot/haswell/patches/0003-haswell-lynxpoint-Add-native-DMI-init.patch
@@ -0,0 +1,615 @@
+From 6ec71c6df97eded010e96c4ea2bd37cc6a13849d Mon Sep 17 00:00:00 2001
+From: Angel Pons <th3fanbus@gmail.com>
+Date: Fri, 6 May 2022 21:56:48 +0200
+Subject: [PATCH 03/26] haswell/lynxpoint: Add native DMI init
+
+Implement native DMI init for Haswell and Lynx Point. This is only
+needed on non-ULT platforms, and only when MRC.bin is not used.
+
+TEST=Verify DMI initialises correctly on Asrock B85M Pro4.
+
+Change-Id: I5fb1a2adc4ffbf0ebbf0d2d3a444055c53765faa
+Signed-off-by: Angel Pons <th3fanbus@gmail.com>
+---
+ src/northbridge/intel/haswell/Makefile.inc | 1 +
+ src/northbridge/intel/haswell/early_dmi.c | 96 ++++++++++++
+ src/northbridge/intel/haswell/early_pcie.c | 121 ++++++++++++++
+ src/northbridge/intel/haswell/haswell.h | 3 +
+ .../haswell/native_raminit/raminit_native.c | 15 ++
+ src/northbridge/intel/haswell/vcu_mailbox.c | 147 ++++++++++++++++++
+ src/northbridge/intel/haswell/vcu_mailbox.h | 16 ++
+ src/southbridge/intel/lynxpoint/Makefile.inc | 2 +
+ .../intel/lynxpoint/early_pch_native.c | 52 +++++++
+ src/southbridge/intel/lynxpoint/pch.h | 20 ++-
+ 10 files changed, 472 insertions(+), 1 deletion(-)
+ create mode 100644 src/northbridge/intel/haswell/early_dmi.c
+ create mode 100644 src/northbridge/intel/haswell/early_pcie.c
+ create mode 100644 src/northbridge/intel/haswell/vcu_mailbox.c
+ create mode 100644 src/northbridge/intel/haswell/vcu_mailbox.h
+ create mode 100644 src/southbridge/intel/lynxpoint/early_pch_native.c
+
+diff --git a/src/northbridge/intel/haswell/Makefile.inc b/src/northbridge/intel/haswell/Makefile.inc
+index 329f1f7ffe..df0b097296 100644
+--- a/src/northbridge/intel/haswell/Makefile.inc
++++ b/src/northbridge/intel/haswell/Makefile.inc
+@@ -20,6 +20,7 @@ romstage-y += report_platform.c
+ postcar-y += memmap.c
+
+ ifeq ($(CONFIG_USE_NATIVE_RAMINIT),y)
++romstage-y += early_dmi.c early_pcie.c vcu_mailbox.c
+ subdirs-y += native_raminit
+
+ else
+diff --git a/src/northbridge/intel/haswell/early_dmi.c b/src/northbridge/intel/haswell/early_dmi.c
+new file mode 100644
+index 0000000000..9941242fd5
+--- /dev/null
++++ b/src/northbridge/intel/haswell/early_dmi.c
+@@ -0,0 +1,96 @@
++/* SPDX-License-Identifier: GPL-2.0-or-later */
++
++#include <console/console.h>
++#include <northbridge/intel/haswell/haswell.h>
++#include <southbridge/intel/lynxpoint/pch.h>
++#include <types.h>
++
++static void dmi_print_link_status(int loglevel)
++{
++ const uint16_t dmilsts = dmibar_read16(DMILSTS);
++ printk(loglevel, "DMI: Running at Gen%u x%u\n", dmilsts & 0xf, dmilsts >> 4 & 0x1f);
++}
++
++#define RETRAIN (1 << 5)
++
++#define LTRN (1 << 11)
++
++static void dmi_setup_physical_layer(void)
++{
++ /* Program DMI AFE settings, which are needed for DMI to work */
++ peg_dmi_recipe(false, 0);
++
++ /* Additional DMI programming steps */
++ dmibar_setbits32(0x258, 1 << 29);
++ dmibar_clrsetbits32(0x208, 0x7ff, 0x6b5);
++ dmibar_clrsetbits32(0x22c, 0xffff, 0x2020);
++
++ /* Write SA reference code version */
++ dmibar_write32(0x71c, 0x0000000f);
++ dmibar_write32(0x720, 0x01060200);
++
++ /* We also have to bring up the PCH side of the DMI link */
++ pch_dmi_setup_physical_layer();
++
++ /* Write-once settings */
++ dmibar_clrsetbits32(DMILCAP, 0x3f00f, 2 << 0);
++
++ printk(BIOS_DEBUG, "Retraining DMI at Gen2 speeds...\n");
++ dmi_print_link_status(BIOS_DEBUG);
++
++ /* Retrain link */
++ dmibar_setbits16(DMILCTL, RETRAIN);
++ do {} while (dmibar_read16(DMILSTS) & LTRN);
++ dmi_print_link_status(BIOS_DEBUG);
++
++ /* Retrain link again for DMI Gen2 speeds */
++ dmibar_setbits16(DMILCTL, RETRAIN);
++ do {} while (dmibar_read16(DMILSTS) & LTRN);
++ dmi_print_link_status(BIOS_INFO);
++}
++
++#define VC_ACTIVE (1U << 31)
++
++#define VCNEGPND (1 << 1)
++
++#define DMI_VC_CFG(vcid, tcmap) (VC_ACTIVE | ((vcid) << 24) | (tcmap))
++
++static void dmi_tc_vc_mapping(void)
++{
++ printk(BIOS_DEBUG, "Programming SA DMI VC/TC mappings...\n");
++
++ if (CONFIG(INTEL_LYNXPOINT_LP))
++ dmibar_setbits8(0xa78, 1 << 1);
++
++ /* Each TC is mapped to one and only one VC */
++ const u32 vc0 = DMI_VC_CFG(0, (1 << 6) | (1 << 5) | (1 << 4) | (1 << 3) | (1 << 0));
++ const u32 vc1 = DMI_VC_CFG(1, (1 << 1));
++ const u32 vcp = DMI_VC_CFG(2, (1 << 2));
++ const u32 vcm = DMI_VC_CFG(7, (1 << 7));
++ dmibar_write32(DMIVC0RCTL, vc0);
++ dmibar_write32(DMIVC1RCTL, vc1);
++ dmibar_write32(DMIVCPRCTL, vcp);
++ dmibar_write32(DMIVCMRCTL, vcm);
++
++ /* Set Extended VC Count (EVCC) to 1 if VC1 is active */
++ dmibar_clrsetbits8(DMIPVCCAP1, 7, !!(vc1 & VC_ACTIVE));
++
++ /*
++ * We also have to program the PCH side of the DMI link. Since both ends
++ * must use the same Virtual Channel settings, we pass them as arguments.
++ */
++ pch_dmi_tc_vc_mapping(vc0, vc1, vcp, vcm);
++
++ printk(BIOS_DEBUG, "Waiting for SA DMI VC negotiation... ");
++ do {} while (dmibar_read16(DMIVC0RSTS) & VCNEGPND);
++ do {} while (dmibar_read16(DMIVC1RSTS) & VCNEGPND);
++ do {} while (dmibar_read16(DMIVCPRSTS) & VCNEGPND);
++ do {} while (dmibar_read16(DMIVCMRSTS) & VCNEGPND);
++ printk(BIOS_DEBUG, "done!\n");
++}
++
++void dmi_early_init(void)
++{
++ dmi_setup_physical_layer();
++ dmi_tc_vc_mapping();
++}
+diff --git a/src/northbridge/intel/haswell/early_pcie.c b/src/northbridge/intel/haswell/early_pcie.c
+new file mode 100644
+index 0000000000..d3940e3fac
+--- /dev/null
++++ b/src/northbridge/intel/haswell/early_pcie.c
+@@ -0,0 +1,121 @@
++/* SPDX-License-Identifier: GPL-2.0-or-later */
++
++#include <console/console.h>
++#include <device/pci_def.h>
++#include <device/pci_mmio_cfg.h>
++#include <device/pci_ops.h>
++#include <northbridge/intel/haswell/haswell.h>
++#include <northbridge/intel/haswell/vcu_mailbox.h>
++#include <types.h>
++
++#define PEG_DEV(func) PCI_DEV(0, 1, func)
++
++#define MAX_PEG_FUNC 3
++
++static void peg_dmi_unset_and_set_mask_pcicfg(
++ volatile union pci_bank *const bank,
++ const uint32_t offset,
++ const uint32_t unset_mask,
++ const uint32_t set_mask,
++ const uint32_t shift,
++ const bool valid)
++{
++ if (!valid)
++ return;
++
++ volatile uint32_t *const addr = &bank->reg32[offset / sizeof(uint32_t)];
++ clrsetbits32(addr, unset_mask << shift, set_mask << shift);
++}
++
++static void peg_dmi_unset_and_set_mask_common(
++ const bool is_peg,
++ const uint32_t offset,
++ const uint32_t unset,
++ const uint32_t set,
++ const uint32_t shift,
++ const bool valid)
++{
++ const uint32_t unset_mask = unset << shift;
++ const uint32_t set_mask = set << shift;
++ if (is_peg) {
++ for (uint8_t i = 0; i < MAX_PEG_FUNC; i++)
++ pci_update_config32(PEG_DEV(i), offset, ~unset_mask, set_mask);
++ } else {
++ dmibar_clrsetbits32(offset, unset_mask, set_mask);
++ }
++}
++
++static void peg_dmi_unset_and_set_mask_vcu_mmio(
++ const uint32_t addr,
++ const uint32_t unset_mask,
++ const uint32_t set_mask,
++ const uint32_t shift,
++ const bool valid)
++{
++ if (!valid)
++ return;
++
++ vcu_update_mmio(addr, ~(unset_mask << shift), set_mask << shift);
++}
++
++#define BUNDLE_STEP 0x20
++
++static void *const dmibar = (void *)(uintptr_t)CONFIG_FIXED_DMIBAR_MMIO_BASE;
++
++void peg_dmi_recipe(const bool is_peg, const pci_devfn_t dev)
++{
++ const bool always = true;
++ const bool is_dmi = !is_peg;
++
++ /* Treat DMIBAR and PEG devices the same way */
++ volatile union pci_bank *const bank = is_peg ? pci_map_bus(dev) : dmibar;
++
++ const size_t bundles = (is_peg ? 8 : 2) * BUNDLE_STEP;
++
++ for (size_t i = 0; i < bundles; i += BUNDLE_STEP) {
++ /* These are actually per-lane */
++ peg_dmi_unset_and_set_mask_pcicfg(bank, 0xa00 + i, 0x1f, 0x0c, 0, always);
++ peg_dmi_unset_and_set_mask_pcicfg(bank, 0xa10 + i, 0x1f, 0x0c, 0, always);
++ }
++
++ for (size_t i = 0; i < bundles; i += BUNDLE_STEP)
++ peg_dmi_unset_and_set_mask_pcicfg(bank, 0x904 + i, 0x1f, 0x02, 0, is_peg);
++
++ for (size_t i = 0; i < bundles; i += BUNDLE_STEP)
++ peg_dmi_unset_and_set_mask_pcicfg(bank, 0x904 + i, 0x1f, 0x03, 5, is_peg);
++
++ for (size_t i = 0; i < bundles; i += BUNDLE_STEP)
++ peg_dmi_unset_and_set_mask_pcicfg(bank, 0x90c + i, 0x3f, 0x09, 5, always);
++
++ for (size_t i = 0; i < bundles; i += BUNDLE_STEP)
++ peg_dmi_unset_and_set_mask_pcicfg(bank, 0x90c + i, 0x0f, 0x05, 21, is_peg);
++
++ for (size_t i = 0; i < bundles; i += BUNDLE_STEP)
++ peg_dmi_unset_and_set_mask_pcicfg(bank, 0x910 + i, 0x0f, 0x08, 6, is_peg);
++
++ for (size_t i = 0; i < bundles; i += BUNDLE_STEP)
++ peg_dmi_unset_and_set_mask_pcicfg(bank, 0x910 + i, 0x0f, 0x00, 10, always);
++
++ for (size_t i = 0; i < bundles; i += BUNDLE_STEP)
++ peg_dmi_unset_and_set_mask_pcicfg(bank, 0x910 + i, 0x07, 0x00, 18, always);
++
++ peg_dmi_unset_and_set_mask_vcu_mmio(0x0c008001, 0x1f, 0x03, 25, is_peg);
++ peg_dmi_unset_and_set_mask_vcu_mmio(0x0c0c8001, 0x3f, 0x00, 23, is_dmi);
++
++ peg_dmi_unset_and_set_mask_pcicfg(bank, 0xc28, 0x1f, 0x13, 18, always);
++
++ peg_dmi_unset_and_set_mask_common(is_peg, 0xc38, 0x01, 0x00, 6, always);
++ peg_dmi_unset_and_set_mask_common(is_peg, 0x260, 0x03, 0x02, 0, always);
++
++ for (size_t i = 0; i < bundles; i += BUNDLE_STEP)
++ peg_dmi_unset_and_set_mask_pcicfg(bank, 0x900 + i, 0x03, 0x00, 26, always);
++
++ for (size_t i = 0; i < bundles; i += BUNDLE_STEP)
++ peg_dmi_unset_and_set_mask_pcicfg(bank, 0x904 + i, 0x03, 0x03, 10, always);
++
++ for (size_t i = 0; i < bundles; i += BUNDLE_STEP)
++ peg_dmi_unset_and_set_mask_pcicfg(bank, 0x90c + i, 0x1f, 0x07, 25, is_peg);
++
++ for (size_t i = 0; i < bundles; i += BUNDLE_STEP)
++ peg_dmi_unset_and_set_mask_pcicfg(bank, 0x91c + i, 0x07, 0x05, 27, is_peg);
++}
+diff --git a/src/northbridge/intel/haswell/haswell.h b/src/northbridge/intel/haswell/haswell.h
+index 1b29f6baf0..30b4abd0a7 100644
+--- a/src/northbridge/intel/haswell/haswell.h
++++ b/src/northbridge/intel/haswell/haswell.h
+@@ -34,6 +34,9 @@ void haswell_early_initialization(void);
+ void haswell_late_initialization(void);
+ void haswell_unhide_peg(void);
+
++void dmi_early_init(void);
++void peg_dmi_recipe(const bool is_peg, const pci_devfn_t dev);
++
+ void report_platform_info(void);
+
+ struct acpi_rsdp;
+diff --git a/src/northbridge/intel/haswell/native_raminit/raminit_native.c b/src/northbridge/intel/haswell/native_raminit/raminit_native.c
+index 1aafdf8659..0938e026e3 100644
+--- a/src/northbridge/intel/haswell/native_raminit/raminit_native.c
++++ b/src/northbridge/intel/haswell/native_raminit/raminit_native.c
+@@ -1,7 +1,19 @@
+ /* SPDX-License-Identifier: GPL-2.0-or-later */
+
+ #include <console/console.h>
++#include <northbridge/intel/haswell/haswell.h>
+ #include <northbridge/intel/haswell/raminit.h>
++#include <types.h>
++
++static bool early_init_native(int s3resume)
++{
++ printk(BIOS_DEBUG, "Starting native platform initialisation\n");
++
++ if (!CONFIG(INTEL_LYNXPOINT_LP))
++ dmi_early_init();
++
++ return false;
++}
+
+ void perform_raminit(const int s3resume)
+ {
+@@ -9,6 +21,9 @@ void perform_raminit(const int s3resume)
+ * See, this function's name is a lie. There are more things to
+ * do that memory initialisation, but they are relatively easy.
+ */
++ const bool cpu_replaced = early_init_native(s3resume);
++
++ (void)cpu_replaced;
+
+ /** TODO: Implement the required magic **/
+ die("NATIVE RAMINIT: More Magic (tm) required.\n");
+diff --git a/src/northbridge/intel/haswell/vcu_mailbox.c b/src/northbridge/intel/haswell/vcu_mailbox.c
+new file mode 100644
+index 0000000000..aead144023
+--- /dev/null
++++ b/src/northbridge/intel/haswell/vcu_mailbox.c
+@@ -0,0 +1,147 @@
++/* SPDX-License-Identifier: GPL-2.0-or-later */
++
++#include <assert.h>
++#include <console/console.h>
++#include <delay.h>
++#include <northbridge/intel/haswell/haswell.h>
++#include <northbridge/intel/haswell/vcu_mailbox.h>
++#include <stdint.h>
++
++/*
++ * This is a library for the VCU (Validation Control Unit) mailbox. This
++ * mailbox is primarily used to adjust some magic PCIe tuning parameters.
++ *
++ * There are two revisions of the VCU mailbox. Rev1 is specific to Haswell
++ * stepping A0, and all other steppings use Rev2. Haswell stepping A0 CPUs
++ * are early Engineering Samples with undocumented errata, and most likely
++ * need special microcode updates to boot. Thus, the code does not support
++ * VCU mailbox Rev1, because no one should need it anymore.
++ */
++
++#define VCU_MAILBOX_INTERFACE 0x6c00
++#define VCU_MAILBOX_DATA 0x6c04
++
++#define VCU_RUN_BUSY (1 << 31)
++
++enum vcu_opcode {
++ VCU_OPCODE_READ_VCU_API_VER_ID = 0x01,
++ VCU_OPCODE_OPEN_SEQ = 0x02,
++ VCU_OPCODE_CLOSE_SEQ = 0x03,
++ VCU_OPCODE_READ_DATA = 0x07,
++ VCU_OPCODE_WRITE_DATA = 0x08,
++ VCU_OPCODE_READ_CSR = 0x13,
++ VCU_OPCODE_WRITE_CSR = 0x14,
++ VCU_OPCODE_READ_MMIO = 0x15,
++ VCU_OPCODE_WRITE_MMIO = 0x16,
++};
++
++enum vcu_sequence {
++ SEQ_ID_READ_CSR = 0x1,
++ SEQ_ID_WRITE_CSR = 0x2,
++ SEQ_ID_READ_MMIO = 0x3,
++ SEQ_ID_WRITE_MMIO = 0x4,
++};
++
++#define VCU_RESPONSE_MASK 0xffff
++#define VCU_RESPONSE_SUCCESS 0x40
++#define VCU_RESPONSE_BUSY 0x80
++#define VCU_RESPONSE_THREAD_UNAVAILABLE 0x82
++#define VCU_RESPONSE_ILLEGAL 0x90
++
++/* FIXME: Use timer API */
++static void send_vcu_command(const enum vcu_opcode opcode, const uint32_t data)
++{
++ for (unsigned int i = 0; i < 10; i++) {
++ mchbar_write32(VCU_MAILBOX_DATA, data);
++ mchbar_write32(VCU_MAILBOX_INTERFACE, opcode | VCU_RUN_BUSY);
++ uint32_t vcu_interface;
++ for (unsigned int j = 0; j < 100; j++) {
++ vcu_interface = mchbar_read32(VCU_MAILBOX_INTERFACE);
++ if (!(vcu_interface & VCU_RUN_BUSY))
++ break;
++
++ udelay(10);
++ }
++ if (vcu_interface & VCU_RUN_BUSY)
++ continue;
++
++ if ((vcu_interface & VCU_RESPONSE_MASK) == VCU_RESPONSE_SUCCESS)
++ return;
++ }
++ printk(BIOS_ERR, "VCU: Failed to send command\n");
++}
++
++static enum vcu_opcode get_register_opcode(enum vcu_sequence seq)
++{
++ switch (seq) {
++ case SEQ_ID_READ_CSR:
++ return VCU_OPCODE_READ_CSR;
++ case SEQ_ID_WRITE_CSR:
++ return VCU_OPCODE_WRITE_CSR;
++ case SEQ_ID_READ_MMIO:
++ return VCU_OPCODE_READ_MMIO;
++ case SEQ_ID_WRITE_MMIO:
++ return VCU_OPCODE_WRITE_MMIO;
++ default:
++ return dead_code_t(enum vcu_opcode);
++ }
++}
++
++static enum vcu_opcode get_data_opcode(enum vcu_sequence seq)
++{
++ switch (seq) {
++ case SEQ_ID_READ_CSR:
++ case SEQ_ID_READ_MMIO:
++ return VCU_OPCODE_READ_DATA;
++ case SEQ_ID_WRITE_CSR:
++ case SEQ_ID_WRITE_MMIO:
++ return VCU_OPCODE_WRITE_DATA;
++ default:
++ return dead_code_t(enum vcu_opcode);
++ }
++}
++
++static uint32_t send_vcu_sequence(uint32_t addr, enum vcu_sequence seq, uint32_t wr_data)
++{
++ send_vcu_command(VCU_OPCODE_OPEN_SEQ, seq);
++
++ send_vcu_command(get_register_opcode(seq), addr);
++
++ send_vcu_command(get_data_opcode(seq), wr_data);
++
++ const uint32_t rd_data = mchbar_read32(VCU_MAILBOX_DATA);
++
++ send_vcu_command(VCU_OPCODE_CLOSE_SEQ, seq);
++
++ return rd_data;
++}
++
++uint32_t vcu_read_csr(uint32_t addr)
++{
++ return send_vcu_sequence(addr, SEQ_ID_READ_CSR, 0);
++}
++
++void vcu_write_csr(uint32_t addr, uint32_t data)
++{
++ send_vcu_sequence(addr, SEQ_ID_WRITE_CSR, data);
++}
++
++void vcu_update_csr(uint32_t addr, uint32_t andvalue, uint32_t orvalue)
++{
++ vcu_write_csr(addr, (vcu_read_csr(addr) & andvalue) | orvalue);
++}
++
++uint32_t vcu_read_mmio(uint32_t addr)
++{
++ return send_vcu_sequence(addr, SEQ_ID_READ_MMIO, 0);
++}
++
++void vcu_write_mmio(uint32_t addr, uint32_t data)
++{
++ send_vcu_sequence(addr, SEQ_ID_WRITE_MMIO, data);
++}
++
++void vcu_update_mmio(uint32_t addr, uint32_t andvalue, uint32_t orvalue)
++{
++ vcu_write_mmio(addr, (vcu_read_mmio(addr) & andvalue) | orvalue);
++}
+diff --git a/src/northbridge/intel/haswell/vcu_mailbox.h b/src/northbridge/intel/haswell/vcu_mailbox.h
+new file mode 100644
+index 0000000000..ba0a62e486
+--- /dev/null
++++ b/src/northbridge/intel/haswell/vcu_mailbox.h
+@@ -0,0 +1,16 @@
++/* SPDX-License-Identifier: GPL-2.0-or-later */
++
++#ifndef HASWELL_VCU_MAILBOX_H
++#define HASWELL_VCU_MAILBOX_H
++
++#include <stdint.h>
++
++uint32_t vcu_read_csr(uint32_t addr);
++void vcu_write_csr(uint32_t addr, uint32_t data);
++void vcu_update_csr(uint32_t addr, uint32_t andvalue, uint32_t orvalue);
++
++uint32_t vcu_read_mmio(uint32_t addr);
++void vcu_write_mmio(uint32_t addr, uint32_t data);
++void vcu_update_mmio(uint32_t addr, uint32_t andvalue, uint32_t orvalue);
++
++#endif /* HASWELL_VCU_MAILBOX_H */
+diff --git a/src/southbridge/intel/lynxpoint/Makefile.inc b/src/southbridge/intel/lynxpoint/Makefile.inc
+index 02022d348d..b8503ac8bc 100644
+--- a/src/southbridge/intel/lynxpoint/Makefile.inc
++++ b/src/southbridge/intel/lynxpoint/Makefile.inc
+@@ -37,6 +37,8 @@ bootblock-y += early_pch.c
+ romstage-y += early_usb.c early_me.c me_status.c early_pch.c
+ romstage-y += pmutil.c
+
++romstage-$(CONFIG_USE_NATIVE_RAMINIT) += early_pch_native.c
++
+ ifeq ($(CONFIG_INTEL_LYNXPOINT_LP),y)
+ romstage-y += lp_gpio.c
+ ramstage-y += lp_gpio.c
+diff --git a/src/southbridge/intel/lynxpoint/early_pch_native.c b/src/southbridge/intel/lynxpoint/early_pch_native.c
+new file mode 100644
+index 0000000000..c28ddfcf5d
+--- /dev/null
++++ b/src/southbridge/intel/lynxpoint/early_pch_native.c
+@@ -0,0 +1,52 @@
++/* SPDX-License-Identifier: GPL-2.0-or-later */
++
++#include <console/console.h>
++#include <device/pci_ops.h>
++#include <southbridge/intel/lynxpoint/pch.h>
++#include <types.h>
++
++void pch_dmi_setup_physical_layer(void)
++{
++ /* FIXME: We need to make sure the SA supports Gen2 as well */
++ if ((RCBA32(0x21a4) & 0x0f) == 0x02) {
++ /* Set Gen 2 Common Clock N_FTS */
++ RCBA32_AND_OR(0x2340, ~0x00ff0000, 0x3a << 16);
++
++ /* Set Target Link Speed to DMI Gen2 */
++ RCBA8_AND_OR(DLCTL2, ~0x07, 0x02);
++ }
++}
++
++#define VC_ACTIVE (1U << 31)
++
++#define VCNEGPND (1 << 1)
++
++void pch_dmi_tc_vc_mapping(const u32 vc0, const u32 vc1, const u32 vcp, const u32 vcm)
++{
++ printk(BIOS_DEBUG, "Programming PCH DMI VC/TC mappings...\n");
++
++ RCBA32_AND_OR(CIR0050, ~(0xf << 20), 2 << 20);
++ if (vcp & VC_ACTIVE)
++ RCBA32_OR(CIR0050, 1 << 19 | 1 << 17);
++
++ RCBA32(CIR0050); /* Posted Write */
++
++ /* Use the same virtual channel mapping on both ends of the DMI link */
++ RCBA32(V0CTL) = vc0;
++ RCBA32(V1CTL) = vc1;
++ RCBA32(V1CTL); /* Posted Write */
++ RCBA32(VPCTL) = vcp;
++ RCBA32(VPCTL); /* Posted Write */
++ RCBA32(VMCTL) = vcm;
++
++ /* Lock the registers */
++ RCBA32_OR(CIR0050, 1U << 31);
++ RCBA32(CIR0050); /* Posted Write */
++
++ printk(BIOS_DEBUG, "Waiting for PCH DMI VC negotiation... ");
++ do {} while (RCBA16(V0STS) & VCNEGPND);
++ do {} while (RCBA16(V1STS) & VCNEGPND);
++ do {} while (RCBA16(VPSTS) & VCNEGPND);
++ do {} while (RCBA16(VMSTS) & VCNEGPND);
++ printk(BIOS_DEBUG, "done!\n");
++}
+diff --git a/src/southbridge/intel/lynxpoint/pch.h b/src/southbridge/intel/lynxpoint/pch.h
+index 7d9fc6d6af..b5e0c2a830 100644
+--- a/src/southbridge/intel/lynxpoint/pch.h
++++ b/src/southbridge/intel/lynxpoint/pch.h
+@@ -113,6 +113,9 @@ enum pch_platform_type {
+ PCH_TYPE_ULT = 5,
+ };
+
++void pch_dmi_setup_physical_layer(void);
++void pch_dmi_tc_vc_mapping(u32 vc0, u32 vc1, u32 vcp, u32 vcm);
++
+ void usb_ehci_sleep_prepare(pci_devfn_t dev, u8 slp_typ);
+ void usb_ehci_disable(pci_devfn_t dev);
+ void usb_xhci_sleep_prepare(pci_devfn_t dev, u8 slp_typ);
+@@ -406,9 +409,10 @@ void mainboard_config_rcba(void);
+
+ /* Southbridge IO BARs */
+
++#define PMBASE 0x40
+ #define GPIOBASE 0x48
+
+-#define PMBASE 0x40
++#define CIR0050 0x0050 /* 32bit */
+
+ #define RPC 0x0400 /* 32bit */
+ #define RPFN 0x0404 /* 32bit */
+@@ -431,6 +435,20 @@ void mainboard_config_rcba(void);
+ #define IOTR2 0x1e90 /* 64bit */
+ #define IOTR3 0x1e98 /* 64bit */
+
++#define V0CTL 0x2014 /* 32bit */
++#define V0STS 0x201a /* 16bit */
++
++#define V1CTL 0x2020 /* 32bit */
++#define V1STS 0x2026 /* 16bit */
++
++#define VPCTL 0x2030 /* 32bit */
++#define VPSTS 0x2038 /* 16bit */
++
++#define VMCTL 0x2040 /* 32bit */
++#define VMSTS 0x2048 /* 16bit */
++
++#define DLCTL2 0x21b0
++
+ #define TCTL 0x3000 /* 8bit */
+
+ #define NOINT 0
+--
+2.39.2
+
diff --git a/config/coreboot/haswell/patches/0004-haswell-lynxpoint-Add-native-early-ME-init.patch b/config/coreboot/haswell/patches/0004-haswell-lynxpoint-Add-native-early-ME-init.patch
new file mode 100644
index 00000000..28dbc02a
--- /dev/null
+++ b/config/coreboot/haswell/patches/0004-haswell-lynxpoint-Add-native-early-ME-init.patch
@@ -0,0 +1,148 @@
+From 98142e01fc8ebb3b762974e9e4de75e7f5c073b4 Mon Sep 17 00:00:00 2001
+From: Angel Pons <th3fanbus@gmail.com>
+Date: Fri, 6 May 2022 22:18:21 +0200
+Subject: [PATCH 04/26] haswell/lynxpoint: Add native early ME init
+
+Implement native early ME init for Lynx Point. This is only needed when
+MRC.bin is not used.
+
+Change-Id: If416e2078f139f26b4742c564b70e018725bf003
+Signed-off-by: Angel Pons <th3fanbus@gmail.com>
+---
+ .../haswell/native_raminit/raminit_native.c | 17 ++++++++++-
+ src/southbridge/intel/lynxpoint/early_me.c | 30 ++++++++++++++++++-
+ src/southbridge/intel/lynxpoint/me.h | 7 +++--
+ 3 files changed, 50 insertions(+), 4 deletions(-)
+
+diff --git a/src/northbridge/intel/haswell/native_raminit/raminit_native.c b/src/northbridge/intel/haswell/native_raminit/raminit_native.c
+index 0938e026e3..6a002548c1 100644
+--- a/src/northbridge/intel/haswell/native_raminit/raminit_native.c
++++ b/src/northbridge/intel/haswell/native_raminit/raminit_native.c
+@@ -1,18 +1,24 @@
+ /* SPDX-License-Identifier: GPL-2.0-or-later */
+
+ #include <console/console.h>
++#include <delay.h>
+ #include <northbridge/intel/haswell/haswell.h>
+ #include <northbridge/intel/haswell/raminit.h>
++#include <southbridge/intel/lynxpoint/me.h>
+ #include <types.h>
+
+ static bool early_init_native(int s3resume)
+ {
+ printk(BIOS_DEBUG, "Starting native platform initialisation\n");
+
++ intel_early_me_init();
++ /** TODO: CPU replacement check must be skipped in warm boots and S3 resumes **/
++ const bool cpu_replaced = !s3resume && intel_early_me_cpu_replacement_check();
++
+ if (!CONFIG(INTEL_LYNXPOINT_LP))
+ dmi_early_init();
+
+- return false;
++ return cpu_replaced;
+ }
+
+ void perform_raminit(const int s3resume)
+@@ -25,6 +31,15 @@ void perform_raminit(const int s3resume)
+
+ (void)cpu_replaced;
+
++ /** TODO: Move after raminit */
++ if (intel_early_me_uma_size() > 0) {
++ /** TODO: Update status once raminit is implemented **/
++ uint8_t me_status = ME_INIT_STATUS_ERROR;
++ intel_early_me_init_done(me_status);
++ }
++
++ intel_early_me_status();
++
+ /** TODO: Implement the required magic **/
+ die("NATIVE RAMINIT: More Magic (tm) required.\n");
+ }
+diff --git a/src/southbridge/intel/lynxpoint/early_me.c b/src/southbridge/intel/lynxpoint/early_me.c
+index 947c570e16..07013c5539 100644
+--- a/src/southbridge/intel/lynxpoint/early_me.c
++++ b/src/southbridge/intel/lynxpoint/early_me.c
+@@ -1,11 +1,12 @@
+ /* SPDX-License-Identifier: GPL-2.0-only */
+
+ #include <arch/io.h>
++#include <cf9_reset.h>
+ #include <device/pci_ops.h>
+ #include <console/console.h>
+ #include <delay.h>
+ #include <halt.h>
+-
++#include <timer.h>
+ #include "me.h"
+ #include "pch.h"
+
+@@ -60,6 +61,33 @@ int intel_early_me_init(void)
+ return 0;
+ }
+
++bool intel_early_me_cpu_replacement_check(void)
++{
++ printk(BIOS_DEBUG, "ME: Checking whether CPU was replaced... ");
++
++ struct stopwatch timer;
++ stopwatch_init_msecs_expire(&timer, 50);
++
++ union me_hfs2 hfs2;
++ do {
++ hfs2.raw = pci_read_config32(PCH_ME_DEV, PCI_ME_HFS2);
++ if (stopwatch_expired(&timer)) {
++ /* Assume CPU was replaced just in case */
++ printk(BIOS_DEBUG, "timed out, assuming CPU was replaced\n");
++ return true;
++ }
++ udelay(ME_DELAY);
++ } while (!hfs2.cpu_replaced_valid);
++
++ if (hfs2.warm_reset_request) {
++ printk(BIOS_DEBUG, "warm reset needed for dynamic fusing\n");
++ system_reset();
++ }
++
++ printk(BIOS_DEBUG, "%sreplaced\n", hfs2.cpu_replaced_sts ? "" : "not ");
++ return hfs2.cpu_replaced_sts;
++}
++
+ int intel_early_me_uma_size(void)
+ {
+ union me_uma uma = { .raw = pci_read_config32(PCH_ME_DEV, PCI_ME_UMA) };
+diff --git a/src/southbridge/intel/lynxpoint/me.h b/src/southbridge/intel/lynxpoint/me.h
+index fe8b0260c4..6990322651 100644
+--- a/src/southbridge/intel/lynxpoint/me.h
++++ b/src/southbridge/intel/lynxpoint/me.h
+@@ -177,14 +177,16 @@ union me_did {
+ union me_hfs2 {
+ struct __packed {
+ u32 bist_in_progress: 1;
+- u32 reserved1: 2;
++ u32 icc_prog_sts: 2;
+ u32 invoke_mebx: 1;
+ u32 cpu_replaced_sts: 1;
+ u32 mbp_rdy: 1;
+ u32 mfs_failure: 1;
+ u32 warm_reset_request: 1;
+ u32 cpu_replaced_valid: 1;
+- u32 reserved2: 4;
++ u32 reserved: 2;
++ u32 fw_upd_ipu: 1;
++ u32 reserved2: 1;
+ u32 mbp_cleared: 1;
+ u32 reserved3: 2;
+ u32 current_state: 8;
+@@ -338,6 +340,7 @@ void intel_me_status(union me_hfs hfs, union me_hfs2 hfs2);
+
+ void intel_early_me_status(void);
+ int intel_early_me_init(void);
++bool intel_early_me_cpu_replacement_check(void);
+ int intel_early_me_uma_size(void);
+ int intel_early_me_init_done(u8 status);
+
+--
+2.39.2
+
diff --git a/config/coreboot/haswell/patches/0005-sb-intel-lynxpoint-Add-native-USB-init.patch b/config/coreboot/haswell/patches/0005-sb-intel-lynxpoint-Add-native-USB-init.patch
new file mode 100644
index 00000000..d9c2570b
--- /dev/null
+++ b/config/coreboot/haswell/patches/0005-sb-intel-lynxpoint-Add-native-USB-init.patch
@@ -0,0 +1,783 @@
+From 9bfb8614dbf1d9800ef8251cb3d839bcdbe5577f Mon Sep 17 00:00:00 2001
+From: Angel Pons <th3fanbus@gmail.com>
+Date: Fri, 6 May 2022 23:17:39 +0200
+Subject: [PATCH 05/26] sb/intel/lynxpoint: Add native USB init
+
+Implement native USB initialisation for Lynx Point. This is only needed
+when MRC.bin is not used.
+
+TO DO: Figure out how to deal with the FIXME's and TODO's lying around.
+
+Change-Id: Ie0fbeeca7b1ca1557173772d733fd2fa27703373
+Signed-off-by: Angel Pons <th3fanbus@gmail.com>
+---
+ .../haswell/native_raminit/raminit_native.c | 3 +
+ src/southbridge/intel/lynxpoint/Makefile.inc | 2 +-
+ src/southbridge/intel/lynxpoint/early_usb.c | 11 -
+ .../intel/lynxpoint/early_usb_native.c | 584 ++++++++++++++++++
+ src/southbridge/intel/lynxpoint/pch.h | 49 ++
+ 5 files changed, 637 insertions(+), 12 deletions(-)
+ create mode 100644 src/southbridge/intel/lynxpoint/early_usb_native.c
+
+diff --git a/src/northbridge/intel/haswell/native_raminit/raminit_native.c b/src/northbridge/intel/haswell/native_raminit/raminit_native.c
+index 6a002548c1..ef61d4ee09 100644
+--- a/src/northbridge/intel/haswell/native_raminit/raminit_native.c
++++ b/src/northbridge/intel/haswell/native_raminit/raminit_native.c
+@@ -5,6 +5,7 @@
+ #include <northbridge/intel/haswell/haswell.h>
+ #include <northbridge/intel/haswell/raminit.h>
+ #include <southbridge/intel/lynxpoint/me.h>
++#include <southbridge/intel/lynxpoint/pch.h>
+ #include <types.h>
+
+ static bool early_init_native(int s3resume)
+@@ -15,6 +16,8 @@ static bool early_init_native(int s3resume)
+ /** TODO: CPU replacement check must be skipped in warm boots and S3 resumes **/
+ const bool cpu_replaced = !s3resume && intel_early_me_cpu_replacement_check();
+
++ early_usb_init();
++
+ if (!CONFIG(INTEL_LYNXPOINT_LP))
+ dmi_early_init();
+
+diff --git a/src/southbridge/intel/lynxpoint/Makefile.inc b/src/southbridge/intel/lynxpoint/Makefile.inc
+index b8503ac8bc..0e1f2fe4eb 100644
+--- a/src/southbridge/intel/lynxpoint/Makefile.inc
++++ b/src/southbridge/intel/lynxpoint/Makefile.inc
+@@ -37,7 +37,7 @@ bootblock-y += early_pch.c
+ romstage-y += early_usb.c early_me.c me_status.c early_pch.c
+ romstage-y += pmutil.c
+
+-romstage-$(CONFIG_USE_NATIVE_RAMINIT) += early_pch_native.c
++romstage-$(CONFIG_USE_NATIVE_RAMINIT) += early_pch_native.c early_usb_native.c iobp.c
+
+ ifeq ($(CONFIG_INTEL_LYNXPOINT_LP),y)
+ romstage-y += lp_gpio.c
+diff --git a/src/southbridge/intel/lynxpoint/early_usb.c b/src/southbridge/intel/lynxpoint/early_usb.c
+index a753681ce0..52e8ac17f8 100644
+--- a/src/southbridge/intel/lynxpoint/early_usb.c
++++ b/src/southbridge/intel/lynxpoint/early_usb.c
+@@ -4,17 +4,6 @@
+ #include <device/pci_def.h>
+ #include "pch.h"
+
+-/* HCD_INDEX == 2 selects 0:1a.0 (PCH_EHCI2), any other index
+- * selects 0:1d.0 (PCH_EHCI1) for usbdebug use.
+- */
+-#if CONFIG_USBDEBUG_HCD_INDEX != 2
+-#define PCH_EHCI1_TEMP_BAR0 CONFIG_EHCI_BAR
+-#define PCH_EHCI2_TEMP_BAR0 (PCH_EHCI1_TEMP_BAR0 + 0x400)
+-#else
+-#define PCH_EHCI2_TEMP_BAR0 CONFIG_EHCI_BAR
+-#define PCH_EHCI1_TEMP_BAR0 (PCH_EHCI2_TEMP_BAR0 + 0x400)
+-#endif
+-
+ /*
+ * Setup USB controller MMIO BAR to prevent the
+ * reference code from resetting the controller.
+diff --git a/src/southbridge/intel/lynxpoint/early_usb_native.c b/src/southbridge/intel/lynxpoint/early_usb_native.c
+new file mode 100644
+index 0000000000..cb6f6ee8e6
+--- /dev/null
++++ b/src/southbridge/intel/lynxpoint/early_usb_native.c
+@@ -0,0 +1,584 @@
++/* SPDX-License-Identifier: GPL-2.0-or-later */
++
++#include <console/console.h>
++#include <delay.h>
++#include <device/mmio.h>
++#include <device/pci_def.h>
++#include <device/pci_ops.h>
++#include <northbridge/intel/haswell/haswell.h>
++#include <northbridge/intel/haswell/raminit.h>
++#include <southbridge/intel/lynxpoint/iobp.h>
++#include <southbridge/intel/lynxpoint/pch.h>
++#include <timer.h>
++#include <types.h>
++
++static unsigned int is_usbr_enabled(void)
++{
++ return !!(pci_read_config32(PCH_XHCI_DEV, XHCI_USB3FUS) & BIT(5));
++}
++
++static char *const xhci_bar = (char *)PCH_XHCI_TEMP_BAR0;
++
++static void ehci_hcs_init(const pci_devfn_t dev, const uintptr_t ehci_bar)
++{
++ pci_write_config32(dev, PCI_BASE_ADDRESS_0, ehci_bar);
++
++ /** FIXME: Determine whether Bus Master is required (or clean it up afterwards) **/
++ pci_or_config16(dev, PCI_COMMAND, PCI_COMMAND_MASTER | PCI_COMMAND_MEMORY);
++
++ char *const mem_bar = (char *)ehci_bar;
++
++ /**
++ * Shared EHCI/XHCI ports w/a.
++ * This step is required when some of the ports are routed to EHCI
++ * and other ports are routed XHCI at the same time.
++ *
++ * FIXME: Under which conditions should this be done?
++ */
++ pci_and_config16(dev, 0x78, ~0x03);
++
++ /* Skip reset if usbdebug is enabled */
++ if (!CONFIG(USBDEBUG_IN_PRE_RAM))
++ setbits32(mem_bar + EHCI_USB_CMD, EHCI_USB_CMD_HCRESET);
++
++ /* 2: Configure number of controllers and ports */
++ pci_or_config16(dev, EHCI_ACCESS_CNTL, ACCESS_CNTL_ENABLE);
++ clrsetbits32(mem_bar + EHCI_HCS_PARAMS, 0xf << 12, 0);
++ clrsetbits32(mem_bar + EHCI_HCS_PARAMS, 0xf << 0, 2 + is_usbr_enabled());
++ pci_and_config16(dev, EHCI_ACCESS_CNTL, ~ACCESS_CNTL_ENABLE);
++
++ pci_or_config16(dev, 0x78, BIT(2));
++ pci_or_config16(dev, 0x7c, BIT(14) | BIT(7));
++ pci_update_config32(dev, 0x8c, ~(0xf << 8), (4 << 8));
++ pci_update_config32(dev, 0x8c, ~BIT(26), BIT(17));
++}
++
++static inline unsigned int physical_port_count(void)
++{
++ return MAX_USB2_PORTS;
++}
++
++static unsigned int hs_port_count(void)
++{
++ /** TODO: Apparently, WPT-LP has 10 USB2 ports **/
++ if (CONFIG(INTEL_LYNXPOINT_LP))
++ return 8;
++
++ switch ((pci_read_config32(PCH_XHCI_DEV, XHCI_USB3FUS) >> 1) & 3) {
++ case 3:
++ return 8;
++ case 2:
++ return 10;
++ case 1:
++ return 12;
++ case 0:
++ default:
++ return 14;
++ }
++}
++
++static unsigned int ss_port_count(void)
++{
++ if (CONFIG(INTEL_LYNXPOINT_LP))
++ return 4;
++
++ switch ((pci_read_config32(PCH_XHCI_DEV, XHCI_USB3FUS) >> 3) & 3) {
++ case 3:
++ return 0;
++ case 2:
++ return 2;
++ case 1:
++ return 4;
++ case 0:
++ default:
++ return 6;
++ }
++}
++
++static void common_ehci_hcs_init(void)
++{
++ const bool is_lp = CONFIG(INTEL_LYNXPOINT_LP);
++
++ ehci_hcs_init(PCH_EHCI1_DEV, PCH_EHCI1_TEMP_BAR0);
++ if (!is_lp)
++ ehci_hcs_init(PCH_EHCI2_DEV, PCH_EHCI2_TEMP_BAR0);
++
++ pch_iobp_update(0xe5007f04, 0, 0x00004481);
++
++ for (unsigned int port = 0; port < physical_port_count(); port++)
++ pch_iobp_update(0xe500400f + port * 0x100, ~(1 << 0), 0 << 0);
++
++ pch_iobp_update(0xe5007f14, ~(3 << 19), (3 << 19));
++
++ if (is_lp)
++ pch_iobp_update(0xe5007f02, ~(3 << 22), (0 << 22));
++}
++
++static void xhci_open_memory_space(void)
++{
++ /** FIXME: Determine whether Bus Master is required (or clean it up afterwards) **/
++ pci_write_config32(PCH_XHCI_DEV, PCI_BASE_ADDRESS_0, (uintptr_t)xhci_bar);
++ pci_or_config16(PCH_XHCI_DEV, PCI_COMMAND, PCI_COMMAND_MASTER | PCI_COMMAND_MEMORY);
++}
++
++static void xhci_close_memory_space(void)
++{
++ pci_and_config16(PCH_XHCI_DEV, PCI_COMMAND, ~(PCI_COMMAND_MASTER | PCI_COMMAND_MEMORY));
++ pci_write_config32(PCH_XHCI_DEV, PCI_BASE_ADDRESS_0, 0);
++}
++
++static void common_xhci_hc_init(void)
++{
++ const bool is_lp = CONFIG(INTEL_LYNXPOINT_LP);
++
++ if (!is_lp) {
++ const unsigned int max_ports = 15 + ss_port_count();
++ clrsetbits32(xhci_bar + XHCI_HCS_PARAMS_1, 0xf << 28, max_ports << 28);
++ }
++
++ clrsetbits32(xhci_bar + XHCI_HCS_PARAMS_3, 0xffff << 16 | 0xff, 0x200 << 16 | 0x0a);
++ clrsetbits32(xhci_bar + XHCI_HCC_PARAMS, BIT(5), BIT(10) | BIT(9));
++
++ if (!is_lp)
++ clrsetbits32(xhci_bar + 0x8008, BIT(19), 0);
++
++ if (is_lp)
++ clrsetbits32(xhci_bar + 0x8058, BIT(8), BIT(16));
++ else
++ clrsetbits32(xhci_bar + 0x8058, BIT(8), BIT(16) | BIT(20));
++
++ clrsetbits32(xhci_bar + 0x8060, 0, BIT(25) | BIT(18));
++ clrsetbits32(xhci_bar + 0x8090, 0, BIT(14) | BIT(8));
++ clrsetbits32(xhci_bar + 0x8094, 0, BIT(23) | BIT(21) | BIT(14));
++ clrsetbits32(xhci_bar + 0x80e0, BIT(16), BIT(6));
++ clrsetbits32(xhci_bar + 0x80ec, (7 << 12) | (7 << 9), (0 << 12) | (6 << 9));
++ clrsetbits32(xhci_bar + 0x80f0, BIT(20), 0);
++
++ if (is_lp)
++ clrsetbits32(xhci_bar + 0x80fc, 0, BIT(25));
++
++ if (is_lp)
++ clrsetbits32(xhci_bar + 0x8110, BIT(8) | BIT(2), BIT(20) | BIT(11));
++ else
++ clrsetbits32(xhci_bar + 0x8110, BIT(2), BIT(20) | BIT(11));
++
++ if (is_lp)
++ write32(xhci_bar + 0x8140, 0xff00f03c);
++ else
++ write32(xhci_bar + 0x8140, 0xff03c132);
++
++ if (is_lp)
++ clrsetbits32(xhci_bar + 0x8154, BIT(21), BIT(13));
++ else
++ clrsetbits32(xhci_bar + 0x8154, BIT(21) | BIT(13), 0);
++
++ clrsetbits32(xhci_bar + 0x8154, BIT(3), 0);
++
++ if (is_lp) {
++ clrsetbits32(xhci_bar + 0x8164, 0, BIT(1) | BIT(0));
++ write32(xhci_bar + 0x8174, 0x01400c0a);
++ write32(xhci_bar + 0x817c, 0x033200a3);
++ write32(xhci_bar + 0x8180, 0x00cb0028);
++ write32(xhci_bar + 0x8184, 0x0064001e);
++ }
++
++ /*
++ * Note: Register at offset 0x44 is 32-bit, but bit 31 is write-once.
++ * We use these weird partial accesses here to avoid locking bit 31.
++ */
++ pci_or_config16(PCH_XHCI_DEV, 0x44, BIT(15) | BIT(14) | BIT(10) | BIT(0));
++ pci_or_config8(PCH_XHCI_DEV, 0x44 + 2, 0x0f);
++
++ /* LPT-LP >= B0 */
++ if (is_lp)
++ clrsetbits32(xhci_bar + 0x8188, 0, BIT(26) | BIT(24));
++
++ /* LPT-H >= C0 */
++ if (!is_lp)
++ clrsetbits32(xhci_bar + 0x8188, 0, BIT(24));
++}
++
++static inline bool is_mem_sr(void)
++{
++ return pci_read_config16(PCH_LPC_DEV, GEN_PMCON_2) & GEN_PMCON_2_MEM_SR;
++}
++
++static bool should_restore_xhci_smart_auto(void)
++{
++ if (!is_mem_sr())
++ return false;
++
++ return pci_read_config32(PCH_LPC_DEV, PMIR) & PMIR_XHCI_SMART_AUTO;
++}
++
++enum usb_port_route {
++ ROUTE_TO_EHCI,
++ ROUTE_TO_XHCI,
++};
++
++/* Returns whether port reset was successful */
++static bool reset_usb2_ports(const unsigned int ehci_ports)
++{
++ for (unsigned int port = 0; port < ehci_ports; port++) {
++ /* Initiate port reset for all USB2 ports */
++ clrsetbits32(
++ xhci_bar + XHCI_USB2_PORTSC(port),
++ XHCI_USB2_PORTSC_PED,
++ XHCI_USB2_PORTSC_PR);
++ }
++ /* Poll for port reset bit to be cleared or time out at 100ms */
++ struct stopwatch timer;
++ stopwatch_init_msecs_expire(&timer, 100);
++ uint32_t reg32;
++ do {
++ reg32 = 0;
++ for (unsigned int port = 0; port < ehci_ports; port++)
++ reg32 |= read32(xhci_bar + XHCI_USB2_PORTSC(port));
++
++ reg32 &= XHCI_USB2_PORTSC_PR;
++ if (!reg32) {
++ const long elapsed_time = stopwatch_duration_usecs(&timer);
++ printk(BIOS_DEBUG, "%s: took %lu usecs\n", __func__, elapsed_time);
++ return true;
++ }
++ /* Reference code has a 10 ms delay here, but a smaller delay works too */
++ udelay(100);
++ } while (!stopwatch_expired(&timer));
++ printk(BIOS_ERR, "%s: timed out\n", __func__);
++ return !reg32;
++}
++
++/* Returns whether warm reset was successful */
++static bool warm_reset_usb3_ports(const unsigned int xhci_ports)
++{
++ for (unsigned int port = 0; port < xhci_ports; port++) {
++ /* Initiate warm reset for all USB3 ports */
++ clrsetbits32(
++ xhci_bar + XHCI_USB3_PORTSC(port),
++ XHCI_USB3_PORTSC_PED,
++ XHCI_USB3_PORTSC_WPR);
++ }
++ /* Poll for port reset bit to be cleared or time out at 100ms */
++ struct stopwatch timer;
++ stopwatch_init_msecs_expire(&timer, 100);
++ uint32_t reg32;
++ do {
++ reg32 = 0;
++ for (unsigned int port = 0; port < xhci_ports; port++)
++ reg32 |= read32(xhci_bar + XHCI_USB3_PORTSC(port));
++
++ reg32 &= XHCI_USB3_PORTSC_PR;
++ if (!reg32) {
++ const long elapsed_time = stopwatch_duration_usecs(&timer);
++ printk(BIOS_DEBUG, "%s: took %lu usecs\n", __func__, elapsed_time);
++ return true;
++ }
++ /* Reference code has a 10 ms delay here, but a smaller delay works too */
++ udelay(100);
++ } while (!stopwatch_expired(&timer));
++ printk(BIOS_ERR, "%s: timed out\n", __func__);
++ return !reg32;
++}
++
++static void perform_xhci_ehci_switching_flow(const enum usb_port_route usb_route)
++{
++ const pci_devfn_t dev = PCH_XHCI_DEV;
++
++ const unsigned int ehci_ports = hs_port_count() + is_usbr_enabled();
++ const unsigned int xhci_ports = ss_port_count();
++
++ const uint32_t ehci_mask = BIT(ehci_ports) - 1;
++ const uint32_t xhci_mask = BIT(xhci_ports) - 1;
++
++ /** TODO: Handle USBr port? How, though? **/
++ pci_update_config32(dev, XHCI_USB2PRM, ~XHCI_USB2PR_HCSEL, ehci_mask);
++ pci_update_config32(dev, XHCI_USB3PRM, ~XHCI_USB3PR_SSEN, xhci_mask);
++
++ /*
++ * Workaround for USB2PR / USB3PR value not surviving warm reset.
++ * Restore USB Port Routing registers if OS HC Switch driver has been executed.
++ */
++ if (should_restore_xhci_smart_auto()) {
++ /** FIXME: Derive values from mainboard code instead? **/
++ pci_update_config32(dev, XHCI_USB2PR, ~XHCI_USB2PR_HCSEL, ehci_mask);
++ pci_update_config32(dev, XHCI_USB3PR, ~XHCI_USB3PR_SSEN, xhci_mask);
++ }
++
++ /* Later stages shouldn't need the value of this bit */
++ pci_and_config32(PCH_LPC_DEV, PMIR, ~PMIR_XHCI_SMART_AUTO);
++
++ /**
++ * FIXME: Things here depend on the chosen routing mode.
++ * For now, implement both functions.
++ */
++
++ /* Route to EHCI if xHCI disabled or auto mode */
++ if (usb_route == ROUTE_TO_EHCI) {
++ if (!reset_usb2_ports(ehci_ports))
++ printk(BIOS_ERR, "USB2 port reset timed out\n");
++
++ pci_and_config32(dev, XHCI_USB2PR, ~XHCI_USB2PR_HCSEL);
++
++ for (unsigned int port = 0; port < ehci_ports; port++) {
++ clrsetbits32(
++ xhci_bar + XHCI_USB2_PORTSC(port),
++ XHCI_USB2_PORTSC_PED,
++ XHCI_USB2_PORTSC_CHST);
++ }
++
++ if (!warm_reset_usb3_ports(xhci_ports))
++ printk(BIOS_ERR, "USB3 warm reset timed out\n");
++
++ /* FIXME: BWG says this should be inside the warm reset function */
++ pci_and_config32(dev, XHCI_USB3PR, ~XHCI_USB3PR_SSEN);
++
++ for (unsigned int port = 0; port < ehci_ports; port++) {
++ clrsetbits32(
++ xhci_bar + XHCI_USB3_PORTSC(port),
++ XHCI_USB3_PORTSC_PED,
++ XHCI_USB3_PORTSC_CHST);
++ }
++
++ setbits32(xhci_bar + XHCI_USBCMD, BIT(0));
++ clrbits32(xhci_bar + XHCI_USBCMD, BIT(0));
++ }
++
++ /* Route to xHCI if xHCI enabled */
++ if (usb_route == ROUTE_TO_XHCI) {
++ if (is_mem_sr()) {
++ if (!warm_reset_usb3_ports(xhci_ports))
++ printk(BIOS_ERR, "USB3 warm reset timed out\n");
++ }
++
++ const uint32_t xhci_port_mask = pci_read_config32(dev, XHCI_USB3PRM) & 0x3f;
++ pci_update_config32(dev, XHCI_USB3PR, ~XHCI_USB3PR_SSEN, xhci_port_mask);
++
++ const uint32_t ehci_port_mask = pci_read_config32(dev, XHCI_USB2PRM) & 0x7fff;
++ pci_update_config32(dev, XHCI_USB2PR, ~XHCI_USB2PR_HCSEL, ehci_port_mask);
++ }
++}
++
++/* Do not shift in this macro, as it can cause undefined behaviour for bad port/oc values */
++#define PORT_TO_OC_SHIFT(port, oc) ((oc) * 8 + (port))
++
++/* Avoid shifting into undefined behaviour */
++static inline bool shift_ok(const int shift)
++{
++ return shift >= 0 && shift < 32;
++}
++
++static void usb_overcurrent_mapping(void)
++{
++ const bool is_lp = CONFIG(INTEL_LYNXPOINT_LP);
++
++ uint32_t ehci_1_ocmap = 0;
++ uint32_t ehci_2_ocmap = 0;
++ uint32_t xhci_1_ocmap = 0;
++ uint32_t xhci_2_ocmap = 0;
++
++ /*
++ * EHCI
++ */
++ for (unsigned int idx = 0; idx < physical_port_count(); idx++) {
++ const struct usb2_port_config *const port = &mainboard_usb2_ports[idx];
++ printk(BIOS_DEBUG, "USB2 port %u => ", idx);
++ if (!port->enable) {
++ printk(BIOS_DEBUG, "disabled\n");
++ continue;
++ }
++ const unsigned short oc_pin = port->oc_pin;
++ if (oc_pin == USB_OC_PIN_SKIP) {
++ printk(BIOS_DEBUG, "not mapped to OC pin\n");
++ continue;
++ }
++ /* Ports 0 .. 7 => OC 0 .. 3 */
++ if (idx < 8 && oc_pin <= 3) {
++ const int shift = PORT_TO_OC_SHIFT(idx, oc_pin);
++ if (shift_ok(shift)) {
++ printk(BIOS_DEBUG, "mapped to OC pin %u\n", oc_pin);
++ ehci_1_ocmap |= 1 << shift;
++ continue;
++ }
++ }
++ /* Ports 8 .. 13 => OC 4 .. 7 (LPT-H only) */
++ if (!is_lp && idx >= 8 && oc_pin >= 4) {
++ const int shift = PORT_TO_OC_SHIFT(idx, oc_pin - 4);
++ if (shift_ok(shift)) {
++ printk(BIOS_DEBUG, "mapped to OC pin %u\n", oc_pin);
++ ehci_2_ocmap |= 1 << shift;
++ continue;
++ }
++ }
++ printk(BIOS_ERR, "Invalid OC pin %u for USB2 port %u\n", oc_pin, idx);
++ }
++ printk(BIOS_DEBUG, "\n");
++ pci_write_config32(PCH_EHCI1_DEV, EHCI_OCMAP, ehci_1_ocmap);
++ if (!is_lp)
++ pci_write_config32(PCH_EHCI2_DEV, EHCI_OCMAP, ehci_2_ocmap);
++
++ /*
++ * xHCI
++ */
++ for (unsigned int idx = 0; idx < ss_port_count(); idx++) {
++ const struct usb3_port_config *const port = &mainboard_usb3_ports[idx];
++ printk(BIOS_DEBUG, "USB3 port %u => ", idx);
++ if (!port->enable) {
++ printk(BIOS_DEBUG, "disabled\n");
++ continue;
++ }
++ const unsigned short oc_pin = port->oc_pin;
++ if (oc_pin == USB_OC_PIN_SKIP) {
++ printk(BIOS_DEBUG, "not mapped to OC pin\n");
++ continue;
++ }
++ /* Ports 0 .. 5 => OC 0 .. 3 */
++ if (oc_pin <= 3) {
++ const int shift = PORT_TO_OC_SHIFT(idx, oc_pin);
++ if (shift_ok(shift)) {
++ printk(BIOS_DEBUG, "mapped to OC pin %u\n", oc_pin);
++ xhci_1_ocmap |= 1 << shift;
++ continue;
++ }
++ }
++ /* Ports 0 .. 5 => OC 4 .. 7 (LPT-H only) */
++ if (!is_lp && oc_pin >= 4) {
++ const int shift = PORT_TO_OC_SHIFT(idx, oc_pin - 4);
++ if (shift_ok(shift)) {
++ printk(BIOS_DEBUG, "mapped to OC pin %u\n", oc_pin);
++ xhci_2_ocmap |= 1 << shift;
++ continue;
++ }
++ }
++ printk(BIOS_ERR, "Invalid OC pin %u for USB3 port %u\n", oc_pin, idx);
++ }
++ printk(BIOS_DEBUG, "\n");
++ pci_write_config32(PCH_XHCI_DEV, XHCI_U2OCM1, ehci_1_ocmap);
++ pci_write_config32(PCH_XHCI_DEV, XHCI_U3OCM1, xhci_1_ocmap);
++ if (!is_lp) {
++ pci_write_config32(PCH_XHCI_DEV, XHCI_U2OCM2, ehci_2_ocmap);
++ pci_write_config32(PCH_XHCI_DEV, XHCI_U3OCM2, xhci_2_ocmap);
++ }
++}
++
++static uint8_t get_ehci_tune_param_1(const struct usb2_port_config *const port)
++{
++ const bool is_lp = CONFIG(INTEL_LYNXPOINT_LP);
++
++ const enum pch_platform_type plat_type = get_pch_platform_type();
++ const enum usb2_port_location location = port->location;
++ const uint16_t length = port->length;
++ if (!is_lp) {
++ if (plat_type == PCH_TYPE_DESKTOP) {
++ if (location == USB_PORT_BACK_PANEL)
++ return 4; /* Back Panel */
++ else
++ return 3; /* Front Panel */
++
++ } else if (plat_type == PCH_TYPE_MOBILE) {
++ if (location == USB_PORT_INTERNAL)
++ return 5; /* Internal Topology */
++ else if (location == USB_PORT_DOCK)
++ return 4; /* Dock */
++ else if (length < 0x70)
++ return 5; /* Back Panel, less than 7" */
++ else
++ return 6; /* Back Panel, 7" or more */
++ }
++ } else {
++ if (location == USB_PORT_BACK_PANEL || location == USB_PORT_MINI_PCIE) {
++ if (length < 0x70)
++ return 5; /* Back Panel, less than 7" */
++ else
++ return 6; /* Back Panel, 7" or more */
++ } else if (location == USB_PORT_DOCK) {
++ return 4; /* Dock */
++ } else {
++ return 5; /* Internal Topology */
++ }
++ }
++ printk(BIOS_ERR, "%s: Unhandled case\n", __func__);
++ return 0;
++}
++
++static uint8_t get_ehci_tune_param_2(const struct usb2_port_config *const port)
++{
++ const bool is_lp = CONFIG(INTEL_LYNXPOINT_LP);
++
++ const enum pch_platform_type plat_type = get_pch_platform_type();
++ const enum usb2_port_location location = port->location;
++ const uint16_t length = port->length;
++ if (!is_lp) {
++ if (plat_type == PCH_TYPE_DESKTOP) {
++ if (location == USB_PORT_BACK_PANEL) {
++ if (length < 0x80)
++ return 2; /* Back Panel, less than 8" */
++ else if (length < 0x130)
++ return 3; /* Back Panel, 8"-13" */
++ else
++ return 4; /* Back Panel, 13" or more */
++ } else {
++ return 2; /* Front Panel */
++ }
++
++ } else if (plat_type == PCH_TYPE_MOBILE) {
++ if (location == USB_PORT_INTERNAL) {
++ return 2; /* Internal Topology */
++ } else if (location == USB_PORT_DOCK) {
++ if (length < 0x50)
++ return 1; /* Dock, less than 5" */
++ else
++ return 2; /* Dock, 5" or more */
++ } else {
++ if (length < 0x100)
++ return 2; /* Back Panel, less than 10" */
++ else
++ return 3; /* Back Panel, 10" or more */
++ }
++ }
++ } else {
++ if (location == USB_PORT_BACK_PANEL || location == USB_PORT_MINI_PCIE) {
++ if (length < 0x100)
++ return 2; /* Back Panel, less than 10" */
++ else
++ return 3; /* Back Panel, 10" or more */
++ } else if (location == USB_PORT_DOCK) {
++ if (length < 0x50)
++ return 1; /* Dock, less than 5" */
++ else
++ return 2; /* Dock, 5" or more */
++ } else {
++ return 2; /* Internal Topology */
++ }
++ }
++ printk(BIOS_ERR, "%s: Unhandled case\n", __func__);
++ return 0;
++}
++
++static void program_ehci_port_length(void)
++{
++ for (unsigned int port = 0; port < physical_port_count(); port++) {
++ if (!mainboard_usb2_ports[port].enable)
++ continue;
++ const uint32_t addr = 0xe5004000 + (port + 1) * 0x100;
++ const uint8_t param_1 = get_ehci_tune_param_1(&mainboard_usb2_ports[port]);
++ const uint8_t param_2 = get_ehci_tune_param_2(&mainboard_usb2_ports[port]);
++ pch_iobp_update(addr, ~0x7f00, param_2 << 11 | param_1 << 8);
++ }
++}
++
++void early_usb_init(void)
++{
++ /** TODO: Make this configurable? How do the modes affect usbdebug? **/
++ const enum usb_port_route usb_route = ROUTE_TO_XHCI;
++ ///(pd->boot_mode == 2 && pd->usb_xhci_on_resume) ? ROUTE_TO_XHCI : ROUTE_TO_EHCI;
++
++ common_ehci_hcs_init();
++ xhci_open_memory_space();
++ common_xhci_hc_init();
++ perform_xhci_ehci_switching_flow(usb_route);
++ usb_overcurrent_mapping();
++ program_ehci_port_length();
++ /** FIXME: USB per port control is missing, is it needed? **/
++ xhci_close_memory_space();
++ /** TODO: Close EHCI memory space? **/
++}
+diff --git a/src/southbridge/intel/lynxpoint/pch.h b/src/southbridge/intel/lynxpoint/pch.h
+index b5e0c2a830..ad983d86cf 100644
+--- a/src/southbridge/intel/lynxpoint/pch.h
++++ b/src/southbridge/intel/lynxpoint/pch.h
+@@ -115,6 +115,7 @@ enum pch_platform_type {
+
+ void pch_dmi_setup_physical_layer(void);
+ void pch_dmi_tc_vc_mapping(u32 vc0, u32 vc1, u32 vcp, u32 vcm);
++void early_usb_init(void);
+
+ void usb_ehci_sleep_prepare(pci_devfn_t dev, u8 slp_typ);
+ void usb_ehci_disable(pci_devfn_t dev);
+@@ -202,6 +203,8 @@ void mainboard_config_rcba(void);
+ #define GEN_PMCON_1 0xa0
+ #define SMI_LOCK (1 << 4)
+ #define GEN_PMCON_2 0xa2
++#define GEN_PMCON_2_DISB (1 << 7)
++#define GEN_PMCON_2_MEM_SR (1 << 5)
+ #define SYSTEM_RESET_STS (1 << 4)
+ #define THERMTRIP_STS (1 << 3)
+ #define SYSPWR_FLR (1 << 1)
+@@ -215,6 +218,7 @@ void mainboard_config_rcba(void);
+ #define PMIR 0xac
+ #define PMIR_CF9LOCK (1 << 31)
+ #define PMIR_CF9GR (1 << 20)
++#define PMIR_XHCI_SMART_AUTO (1 << 16) /* c.f. LPT BWG or WPT-LP BIOS spec */
+
+ /* GEN_PMCON_3 bits */
+ #define RTC_BATTERY_DEAD (1 << 2)
+@@ -282,6 +286,20 @@ void mainboard_config_rcba(void);
+ #define SATA_DTLE_DATA_SHIFT 24
+ #define SATA_DTLE_EDGE_SHIFT 16
+
++/*
++ * HCD_INDEX == 2 selects 0:1a.0 (PCH_EHCI2), any other index
++ * selects 0:1d.0 (PCH_EHCI1) for usbdebug use.
++ */
++#if CONFIG_USBDEBUG_HCD_INDEX != 2
++#define PCH_EHCI1_TEMP_BAR0 CONFIG_EHCI_BAR
++#define PCH_EHCI2_TEMP_BAR0 (PCH_EHCI1_TEMP_BAR0 + 0x400)
++#else
++#define PCH_EHCI2_TEMP_BAR0 CONFIG_EHCI_BAR
++#define PCH_EHCI1_TEMP_BAR0 (PCH_EHCI2_TEMP_BAR0 + 0x400)
++#endif
++
++#define PCH_XHCI_TEMP_BAR0 0xe8100000
++
+ /* EHCI PCI Registers */
+ #define EHCI_PWR_CTL_STS 0x54
+ #define PWR_CTL_SET_MASK 0x3
+@@ -289,10 +307,15 @@ void mainboard_config_rcba(void);
+ #define PWR_CTL_SET_D3 0x3
+ #define PWR_CTL_ENABLE_PME (1 << 8)
+ #define PWR_CTL_STATUS_PME (1 << 15)
++#define EHCI_OCMAP 0x74
++#define EHCI_ACCESS_CNTL 0x80
++#define ACCESS_CNTL_ENABLE (1 << 0)
+
+ /* EHCI Memory Registers */
++#define EHCI_HCS_PARAMS 0x04
+ #define EHCI_USB_CMD 0x20
+ #define EHCI_USB_CMD_RUN (1 << 0)
++#define EHCI_USB_CMD_HCRESET (1 << 1)
+ #define EHCI_USB_CMD_PSE (1 << 4)
+ #define EHCI_USB_CMD_ASE (1 << 5)
+ #define EHCI_PORTSC(port) (0x64 + (port) * 4)
+@@ -301,6 +324,10 @@ void mainboard_config_rcba(void);
+
+ /* XHCI PCI Registers */
+ #define XHCI_PWR_CTL_STS 0x74
++#define XHCI_U2OCM1 0xc0
++#define XHCI_U2OCM2 0xc4
++#define XHCI_U3OCM1 0xc8
++#define XHCI_U3OCM2 0xcc
+ #define XHCI_USB2PR 0xd0
+ #define XHCI_USB2PRM 0xd4
+ #define XHCI_USB2PR_HCSEL 0x7fff
+@@ -313,6 +340,27 @@ void mainboard_config_rcba(void);
+ #define XHCI_USB3PDO 0xe8
+
+ /* XHCI Memory Registers */
++#define XHCI_HCS_PARAMS_1 0x04
++#define XHCI_HCS_PARAMS_2 0x08
++#define XHCI_HCS_PARAMS_3 0x0c
++#define XHCI_HCC_PARAMS 0x10
++#define XHCI_USBCMD 0x80
++#define XHCI_USB2_PORTSC(port) (0x480 + ((port) * 0x10))
++#define XHCI_USB2_PORTSC_WPR (1 << 31) /* Warm Port Reset */
++#define XHCI_USB2_PORTSC_CEC (1 << 23) /* Port Config Error Change */
++#define XHCI_USB2_PORTSC_PLC (1 << 22) /* Port Link State Change */
++#define XHCI_USB2_PORTSC_PRC (1 << 21) /* Port Reset Change */
++#define XHCI_USB2_PORTSC_OCC (1 << 20) /* Over-current Change */
++#define XHCI_USB2_PORTSC_WRC (1 << 19) /* Warm Port Reset Change */
++#define XHCI_USB2_PORTSC_PEC (1 << 18) /* Port Enabled Disabled Change */
++#define XHCI_USB2_PORTSC_CSC (1 << 17) /* Connect Status Change */
++#define XHCI_USB2_PORTSC_CHST (0x7f << 17)
++#define XHCI_USB2_PORTSC_LWS (1 << 16) /* Port Link State Write Strobe */
++#define XHCI_USB2_PORTSC_PP (1 << 9)
++#define XHCI_USB2_PORTSC_PR (1 << 4) /* Port Reset */
++#define XHCI_USB2_PORTSC_PED (1 << 1) /* Port Enable/Disabled */
++#define XHCI_USB2_PORTSC_CCS (1 << 0) /* Current Connect Status */
++
+ #define XHCI_USB3_PORTSC(port) ((pch_is_lp() ? 0x510 : 0x570) + ((port) * 0x10))
+ #define XHCI_USB3_PORTSC_CHST (0x7f << 17)
+ #define XHCI_USB3_PORTSC_WCE (1 << 25) /* Wake on Connect */
+@@ -320,6 +368,7 @@ void mainboard_config_rcba(void);
+ #define XHCI_USB3_PORTSC_WOE (1 << 27) /* Wake on Overcurrent */
+ #define XHCI_USB3_PORTSC_WRC (1 << 19) /* Warm Reset Complete */
+ #define XHCI_USB3_PORTSC_LWS (1 << 16) /* Link Write Strobe */
++#define XHCI_USB3_PORTSC_PR (1 << 4) /* Port Reset */
+ #define XHCI_USB3_PORTSC_PED (1 << 1) /* Port Enabled/Disabled */
+ #define XHCI_USB3_PORTSC_WPR (1 << 31) /* Warm Port Reset */
+ #define XHCI_USB3_PORTSC_PLS (0xf << 5) /* Port Link State */
+--
+2.39.2
+
diff --git a/config/coreboot/haswell/patches/0006-sb-intel-lynxpoint-Add-native-thermal-init.patch b/config/coreboot/haswell/patches/0006-sb-intel-lynxpoint-Add-native-thermal-init.patch
new file mode 100644
index 00000000..157d2999
--- /dev/null
+++ b/config/coreboot/haswell/patches/0006-sb-intel-lynxpoint-Add-native-thermal-init.patch
@@ -0,0 +1,128 @@
+From 92be49d8422b4bc1c89bb49535f4dc6a01d47295 Mon Sep 17 00:00:00 2001
+From: Angel Pons <th3fanbus@gmail.com>
+Date: Fri, 6 May 2022 23:22:11 +0200
+Subject: [PATCH 06/26] sb/intel/lynxpoint: Add native thermal init
+
+Implement native thermal initialisation for Lynx Point. This is only
+needed when MRC.bin is not used.
+
+Change-Id: I4a67a3092d0c2e56bfdacb513a899ef838193cbd
+Signed-off-by: Angel Pons <th3fanbus@gmail.com>
+---
+ .../haswell/native_raminit/raminit_native.c | 1 +
+ src/southbridge/intel/lynxpoint/Makefile.inc | 2 +-
+ src/southbridge/intel/lynxpoint/pch.h | 1 +
+ src/southbridge/intel/lynxpoint/thermal.c | 64 +++++++++++++++++++
+ 4 files changed, 67 insertions(+), 1 deletion(-)
+ create mode 100644 src/southbridge/intel/lynxpoint/thermal.c
+
+diff --git a/src/northbridge/intel/haswell/native_raminit/raminit_native.c b/src/northbridge/intel/haswell/native_raminit/raminit_native.c
+index ef61d4ee09..dd1f1ec14e 100644
+--- a/src/northbridge/intel/haswell/native_raminit/raminit_native.c
++++ b/src/northbridge/intel/haswell/native_raminit/raminit_native.c
+@@ -16,6 +16,7 @@ static bool early_init_native(int s3resume)
+ /** TODO: CPU replacement check must be skipped in warm boots and S3 resumes **/
+ const bool cpu_replaced = !s3resume && intel_early_me_cpu_replacement_check();
+
++ early_thermal_init();
+ early_usb_init();
+
+ if (!CONFIG(INTEL_LYNXPOINT_LP))
+diff --git a/src/southbridge/intel/lynxpoint/Makefile.inc b/src/southbridge/intel/lynxpoint/Makefile.inc
+index 0e1f2fe4eb..a9a9b153d6 100644
+--- a/src/southbridge/intel/lynxpoint/Makefile.inc
++++ b/src/southbridge/intel/lynxpoint/Makefile.inc
+@@ -37,7 +37,7 @@ bootblock-y += early_pch.c
+ romstage-y += early_usb.c early_me.c me_status.c early_pch.c
+ romstage-y += pmutil.c
+
+-romstage-$(CONFIG_USE_NATIVE_RAMINIT) += early_pch_native.c early_usb_native.c iobp.c
++romstage-$(CONFIG_USE_NATIVE_RAMINIT) += early_pch_native.c early_usb_native.c iobp.c thermal.c
+
+ ifeq ($(CONFIG_INTEL_LYNXPOINT_LP),y)
+ romstage-y += lp_gpio.c
+diff --git a/src/southbridge/intel/lynxpoint/pch.h b/src/southbridge/intel/lynxpoint/pch.h
+index ad983d86cf..38a9349220 100644
+--- a/src/southbridge/intel/lynxpoint/pch.h
++++ b/src/southbridge/intel/lynxpoint/pch.h
+@@ -116,6 +116,7 @@ enum pch_platform_type {
+ void pch_dmi_setup_physical_layer(void);
+ void pch_dmi_tc_vc_mapping(u32 vc0, u32 vc1, u32 vcp, u32 vcm);
+ void early_usb_init(void);
++void early_thermal_init(void);
+
+ void usb_ehci_sleep_prepare(pci_devfn_t dev, u8 slp_typ);
+ void usb_ehci_disable(pci_devfn_t dev);
+diff --git a/src/southbridge/intel/lynxpoint/thermal.c b/src/southbridge/intel/lynxpoint/thermal.c
+new file mode 100644
+index 0000000000..e71969ea0c
+--- /dev/null
++++ b/src/southbridge/intel/lynxpoint/thermal.c
+@@ -0,0 +1,64 @@
++/* SPDX-License-Identifier: GPL-2.0-or-later */
++
++#include <device/mmio.h>
++#include <device/pci_ops.h>
++#include <southbridge/intel/lynxpoint/pch.h>
++#include <types.h>
++
++#define TBARB_TEMP 0x40000000
++
++#define THERMAL_DEV PCI_DEV(0, 0x1f, 6)
++
++/* Early thermal init, it may need to be done prior to giving ME its memory */
++void early_thermal_init(void)
++{
++ /* Program address for temporary BAR */
++ pci_write_config32(THERMAL_DEV, 0x40, TBARB_TEMP);
++ pci_write_config32(THERMAL_DEV, 0x44, 0);
++
++ /* Activate temporary BAR */
++ pci_or_config32(THERMAL_DEV, 0x40, 1);
++
++ /*
++ * BWG section 17.3.1 says:
++ *
++ * ### Initializing Lynx Point Thermal Sensors ###
++ *
++ * The System BIOS must perform the following steps to initialize the Lynx
++ * Point thermal subsystem device, D31:F6. The System BIOS is required to
++ * repeat this process on a resume from Sx. BIOS may enable any or all of
++ * the registers below based on OEM's platform configuration. Intel does
++ * not recommend a value on some of the registers, since each platform has
++ * different temperature trip points and one may enable a trip to cause an
++ * SMI while another platform would cause an interrupt instead.
++ *
++ * The recommended flow for enabling thermal sensor is by setting up various
++ * temperature trip points first, followed by enabling the desired trip
++ * alert method and then enable the actual sensors from TSEL registers.
++ * If this flow is not followed, software will need to take special care
++ * to handle false events during setting up those registers.
++ */
++
++ /* Step 1: Program CTT */
++ write16p(TBARB_TEMP + 0x10, 0x0154);
++
++ /* Step 2: Clear trip status from TSS and TAS */
++ write8p(TBARB_TEMP + 0x06, 0xff);
++ write8p(TBARB_TEMP + 0x80, 0xff);
++
++ /* Step 3: Program TSGPEN and TSPIEN to zero */
++ write8p(TBARB_TEMP + 0x84, 0x00);
++ write8p(TBARB_TEMP + 0x82, 0x00);
++
++ /*
++ * Step 4: If thermal reporting to an EC over SMBus is supported,
++ * then write 0x01 to TSREL, else leave at default.
++ */
++ write8p(TBARB_TEMP + 0x0a, 0x01);
++
++ /* Disable temporary BAR */
++ pci_and_config32(THERMAL_DEV, 0x40, ~1);
++
++ /* Clear temporary BAR address */
++ pci_write_config32(THERMAL_DEV, 0x40, 0);
++}
+--
+2.39.2
+
diff --git a/config/coreboot/haswell/patches/0007-sb-intel-lynxpoint-Add-native-PCH-init.patch b/config/coreboot/haswell/patches/0007-sb-intel-lynxpoint-Add-native-PCH-init.patch
new file mode 100644
index 00000000..74427f5d
--- /dev/null
+++ b/config/coreboot/haswell/patches/0007-sb-intel-lynxpoint-Add-native-PCH-init.patch
@@ -0,0 +1,785 @@
+From 7378cb4fefc87b9a096bb14820a44f26f3a628f5 Mon Sep 17 00:00:00 2001
+From: Angel Pons <th3fanbus@gmail.com>
+Date: Fri, 6 May 2022 23:43:46 +0200
+Subject: [PATCH 07/26] sb/intel/lynxpoint: Add native PCH init
+
+Implement native PCH initialisation for Lynx Point. This is only needed
+when MRC.bin is not used.
+
+Change-Id: I36867bdc8b20000e44ff9d0d7b2c0d63952bd561
+Signed-off-by: Angel Pons <th3fanbus@gmail.com>
+---
+ .../haswell/native_raminit/raminit_native.c | 3 +-
+ src/southbridge/intel/lynxpoint/Makefile.inc | 1 +
+ .../intel/lynxpoint/early_pch_native.c | 123 +++++++++
+ .../intel/lynxpoint/hsio/Makefile.inc | 8 +
+ src/southbridge/intel/lynxpoint/hsio/common.c | 52 ++++
+ src/southbridge/intel/lynxpoint/hsio/hsio.h | 46 ++++
+ .../intel/lynxpoint/hsio/lpt_h_cx.c | 244 ++++++++++++++++++
+ .../intel/lynxpoint/hsio/lpt_lp_bx.c | 180 +++++++++++++
+ src/southbridge/intel/lynxpoint/pch.h | 6 +
+ 9 files changed, 661 insertions(+), 2 deletions(-)
+ create mode 100644 src/southbridge/intel/lynxpoint/hsio/Makefile.inc
+ create mode 100644 src/southbridge/intel/lynxpoint/hsio/common.c
+ create mode 100644 src/southbridge/intel/lynxpoint/hsio/hsio.h
+ create mode 100644 src/southbridge/intel/lynxpoint/hsio/lpt_h_cx.c
+ create mode 100644 src/southbridge/intel/lynxpoint/hsio/lpt_lp_bx.c
+
+diff --git a/src/northbridge/intel/haswell/native_raminit/raminit_native.c b/src/northbridge/intel/haswell/native_raminit/raminit_native.c
+index dd1f1ec14e..b6efb6b40d 100644
+--- a/src/northbridge/intel/haswell/native_raminit/raminit_native.c
++++ b/src/northbridge/intel/haswell/native_raminit/raminit_native.c
+@@ -16,8 +16,7 @@ static bool early_init_native(int s3resume)
+ /** TODO: CPU replacement check must be skipped in warm boots and S3 resumes **/
+ const bool cpu_replaced = !s3resume && intel_early_me_cpu_replacement_check();
+
+- early_thermal_init();
+- early_usb_init();
++ early_pch_init_native(s3resume);
+
+ if (!CONFIG(INTEL_LYNXPOINT_LP))
+ dmi_early_init();
+diff --git a/src/southbridge/intel/lynxpoint/Makefile.inc b/src/southbridge/intel/lynxpoint/Makefile.inc
+index a9a9b153d6..63243ecc86 100644
+--- a/src/southbridge/intel/lynxpoint/Makefile.inc
++++ b/src/southbridge/intel/lynxpoint/Makefile.inc
+@@ -38,6 +38,7 @@ romstage-y += early_usb.c early_me.c me_status.c early_pch.c
+ romstage-y += pmutil.c
+
+ romstage-$(CONFIG_USE_NATIVE_RAMINIT) += early_pch_native.c early_usb_native.c iobp.c thermal.c
++subdirs-$(CONFIG_USE_NATIVE_RAMINIT) += hsio
+
+ ifeq ($(CONFIG_INTEL_LYNXPOINT_LP),y)
+ romstage-y += lp_gpio.c
+diff --git a/src/southbridge/intel/lynxpoint/early_pch_native.c b/src/southbridge/intel/lynxpoint/early_pch_native.c
+index c28ddfcf5d..421821fa5d 100644
+--- a/src/southbridge/intel/lynxpoint/early_pch_native.c
++++ b/src/southbridge/intel/lynxpoint/early_pch_native.c
+@@ -1,10 +1,133 @@
+ /* SPDX-License-Identifier: GPL-2.0-or-later */
+
+ #include <console/console.h>
++#include <device/pci_def.h>
+ #include <device/pci_ops.h>
++#include <southbridge/intel/lynxpoint/hsio/hsio.h>
+ #include <southbridge/intel/lynxpoint/pch.h>
+ #include <types.h>
+
++static void early_sata_init(const uint8_t pch_revision)
++{
++ const bool is_mobile = get_pch_platform_type() != PCH_TYPE_DESKTOP;
++
++ const uint8_t lane_owner = pci_read_config8(PCI_DEV(0, 0x1c, 0), 0x410);
++ printk(BIOS_DEBUG, "HSIO lane owner: 0x%02x\n", lane_owner);
++
++ /* BWG Step 2 */
++ pci_update_config32(PCH_SATA_DEV, SATA_SCLKG, ~0x1ff, 0x183);
++
++ /* BWG Step 3: Set OOB Retry Mode */
++ pci_or_config16(PCH_SATA_DEV, SATA_PCS, 1 << 15);
++
++ /* BWG Step 4: Program the SATA mPHY tables */
++ if (pch_is_lp()) {
++ if (pch_revision >= LPT_LP_STEP_B0 && pch_revision <= LPT_LP_STEP_B2) {
++ program_hsio_sata_lpt_lp_bx(is_mobile);
++ } else {
++ printk(BIOS_ERR, "Unsupported PCH-LP stepping 0x%02x\n", pch_revision);
++ }
++ } else {
++ if (pch_revision >= LPT_H_STEP_C0) {
++ program_hsio_sata_lpt_h_cx(is_mobile);
++ } else {
++ printk(BIOS_ERR, "Unsupported PCH-H stepping 0x%02x\n", pch_revision);
++ }
++ }
++
++ /** FIXME: Program SATA RxEq tables **/
++
++ /* BWG Step 5 */
++ /** FIXME: Only for desktop and mobile (skip this on workstation and server) **/
++ pci_or_config32(PCH_SATA_DEV, 0x98, BIT(22));
++
++ /* BWG Step 6 */
++ pci_or_config32(PCH_SATA_DEV, 0x98, BIT(19));
++
++ /* BWG Step 7 */
++ pci_update_config32(PCH_SATA_DEV, 0x98, ~(0x3f << 7), 0x04 << 7);
++
++ /* BWG Step 8 */
++ pci_or_config32(PCH_SATA_DEV, 0x98, BIT(20));
++
++ /* BWG Step 9 */
++ pci_update_config32(PCH_SATA_DEV, 0x98, ~(3 << 5), 1 << 5);
++
++ /* BWG Step 10 */
++ pci_or_config32(PCH_SATA_DEV, 0x98, BIT(18));
++
++ /* Enable SATA ports */
++ uint8_t sata_pcs = 0;
++ if (CONFIG(INTEL_LYNXPOINT_LP)) {
++ for (uint8_t i = 0; i < 4; i++) {
++ if ((lane_owner & BIT(7 - i)) == 0) {
++ sata_pcs |= BIT(i);
++ }
++ }
++ } else {
++ sata_pcs |= 0x0f;
++ for (uint8_t i = 4; i < 6; i++) {
++ if ((lane_owner & BIT(i)) == 0) {
++ sata_pcs |= BIT(i);
++ }
++ }
++ }
++ printk(BIOS_DEBUG, "SATA port enables: 0x%02x\n", sata_pcs);
++ pci_or_config8(PCH_SATA_DEV, SATA_PCS, sata_pcs);
++}
++
++void early_pch_init_native(int s3resume)
++{
++ const uint8_t pch_revision = pci_read_config8(PCH_LPC_DEV, PCI_REVISION_ID);
++
++ RCBA16(DISPBDF) = 0x0010;
++ RCBA32_OR(FD2, PCH_ENABLE_DBDF);
++
++ /** FIXME: Check GEN_PMCON_3 and handle RTC failure? **/
++
++ RCBA32(PRSTS) = BIT(4);
++
++ early_sata_init(pch_revision);
++
++ pci_or_config8(PCH_LPC_DEV, 0xa6, 1 << 1);
++ pci_and_config8(PCH_LPC_DEV, 0xdc, ~(1 << 5 | 1 << 1));
++
++ /** TODO: Send GET HSIO VER and update ChipsetInit table? Is it needed? **/
++
++ /** FIXME: GbE handling? **/
++
++ pci_update_config32(PCH_LPC_DEV, 0xac, ~(1 << 20), 0);
++
++ for (uint8_t i = 0; i < 8; i++)
++ pci_update_config32(PCI_DEV(0, 0x1c, i), 0x338, ~(1 << 26), 0);
++
++ pci_update_config8(PCI_DEV(0, 0x1c, 0), 0xf4, ~(3 << 5), 1 << 7);
++
++ pci_update_config8(PCI_DEV(0, 26, 0), 0x88, ~(1 << 2), 0);
++ pci_update_config8(PCI_DEV(0, 29, 0), 0x88, ~(1 << 2), 0);
++
++ /** FIXME: Disable SATA2 device? **/
++
++ if (pch_is_lp()) {
++ if (pch_revision >= LPT_LP_STEP_B0 && pch_revision <= LPT_LP_STEP_B2) {
++ program_hsio_xhci_lpt_lp_bx();
++ program_hsio_igbe_lpt_lp_bx();
++ } else {
++ printk(BIOS_ERR, "Unsupported PCH-LP stepping 0x%02x\n", pch_revision);
++ }
++ } else {
++ if (pch_revision >= LPT_H_STEP_C0) {
++ program_hsio_xhci_lpt_h_cx();
++ program_hsio_igbe_lpt_h_cx();
++ } else {
++ printk(BIOS_ERR, "Unsupported PCH-H stepping 0x%02x\n", pch_revision);
++ }
++ }
++
++ early_thermal_init();
++ early_usb_init();
++}
++
+ void pch_dmi_setup_physical_layer(void)
+ {
+ /* FIXME: We need to make sure the SA supports Gen2 as well */
+diff --git a/src/southbridge/intel/lynxpoint/hsio/Makefile.inc b/src/southbridge/intel/lynxpoint/hsio/Makefile.inc
+new file mode 100644
+index 0000000000..6b74997511
+--- /dev/null
++++ b/src/southbridge/intel/lynxpoint/hsio/Makefile.inc
+@@ -0,0 +1,8 @@
++## SPDX-License-Identifier: GPL-2.0-or-later
++
++romstage-y += common.c
++ifeq ($(CONFIG_INTEL_LYNXPOINT_LP),y)
++romstage-y += lpt_lp_bx.c
++else
++romstage-y += lpt_h_cx.c
++endif
+diff --git a/src/southbridge/intel/lynxpoint/hsio/common.c b/src/southbridge/intel/lynxpoint/hsio/common.c
+new file mode 100644
+index 0000000000..9935ca347a
+--- /dev/null
++++ b/src/southbridge/intel/lynxpoint/hsio/common.c
+@@ -0,0 +1,52 @@
++/* SPDX-License-Identifier: GPL-2.0-or-later */
++
++#include <device/pci_ops.h>
++#include <southbridge/intel/lynxpoint/hsio/hsio.h>
++#include <types.h>
++
++/*
++ * FIXME: Ask Intel whether all lanes need to be programmed as specified
++ * in the PCH BWG. If not, make separate tables and only check this once.
++ */
++void hsio_sata_shared_update(const uint32_t addr, const uint32_t and, const uint32_t or)
++{
++ const uint8_t lane_owner = pci_read_config8(PCI_DEV(0, 0x1c, 0), 0x410);
++
++ if ((addr & 0xfe00) == 0x2000 && (lane_owner & (1 << 4)))
++ return;
++
++ if ((addr & 0xfe00) == 0x2200 && (lane_owner & (1 << 5)))
++ return;
++
++ if (CONFIG(INTEL_LYNXPOINT_LP)) {
++ if ((addr & 0xfe00) == 0x2400 && (lane_owner & (1 << 6)))
++ return;
++
++ if ((addr & 0xfe00) == 0x2600 && (lane_owner & (1 << 7)))
++ return;
++ }
++ hsio_update(addr, and, or);
++}
++
++/*
++ * FIXME: Ask Intel whether all lanes need to be programmed as specified
++ * in the PCH BWG. If not, make separate tables and only check this once.
++ */
++void hsio_xhci_shared_update(const uint32_t addr, const uint32_t and, const uint32_t or)
++{
++ const uint8_t lane_owner = pci_read_config8(PCI_DEV(0, 0x1c, 0), 0x410);
++ if (CONFIG(INTEL_LYNXPOINT_LP)) {
++ if ((addr & 0xfe00) == 0x2400 && ((lane_owner >> 0) & 3) != 2)
++ return;
++
++ if ((addr & 0xfe00) == 0x2600 && ((lane_owner >> 2) & 3) != 2)
++ return;
++ } else {
++ if ((addr & 0xfe00) == 0x2c00 && ((lane_owner >> 2) & 3) != 2)
++ return;
++
++ if ((addr & 0xfe00) == 0x2e00 && ((lane_owner >> 0) & 3) != 2)
++ return;
++ }
++ hsio_update(addr, and, or);
++}
+diff --git a/src/southbridge/intel/lynxpoint/hsio/hsio.h b/src/southbridge/intel/lynxpoint/hsio/hsio.h
+new file mode 100644
+index 0000000000..689ef4a05b
+--- /dev/null
++++ b/src/southbridge/intel/lynxpoint/hsio/hsio.h
+@@ -0,0 +1,46 @@
++/* SPDX-License-Identifier: GPL-2.0-or-later */
++
++#ifndef SOUTHBRIDGE_INTEL_LYNXPOINT_HSIO_H
++#define SOUTHBRIDGE_INTEL_LYNXPOINT_HSIO_H
++
++#include <southbridge/intel/lynxpoint/iobp.h>
++#include <types.h>
++
++struct hsio_table_row {
++ uint32_t addr;
++ uint32_t and;
++ uint32_t or;
++};
++
++static inline void hsio_update(const uint32_t addr, const uint32_t and, const uint32_t or)
++{
++ pch_iobp_update(addr, and, or);
++}
++
++static inline void hsio_update_row(const struct hsio_table_row row)
++{
++ hsio_update(row.addr, row.and, row.or);
++}
++
++void hsio_xhci_shared_update(const uint32_t addr, const uint32_t and, const uint32_t or);
++void hsio_sata_shared_update(const uint32_t addr, const uint32_t and, const uint32_t or);
++
++static inline void hsio_sata_shared_update_row(const struct hsio_table_row row)
++{
++ hsio_sata_shared_update(row.addr, row.and, row.or);
++}
++
++static inline void hsio_xhci_shared_update_row(const struct hsio_table_row row)
++{
++ hsio_xhci_shared_update(row.addr, row.and, row.or);
++}
++
++void program_hsio_sata_lpt_h_cx(const bool is_mobile);
++void program_hsio_xhci_lpt_h_cx(void);
++void program_hsio_igbe_lpt_h_cx(void);
++
++void program_hsio_sata_lpt_lp_bx(const bool is_mobile);
++void program_hsio_xhci_lpt_lp_bx(void);
++void program_hsio_igbe_lpt_lp_bx(void);
++
++#endif
+diff --git a/src/southbridge/intel/lynxpoint/hsio/lpt_h_cx.c b/src/southbridge/intel/lynxpoint/hsio/lpt_h_cx.c
+new file mode 100644
+index 0000000000..b5dd402742
+--- /dev/null
++++ b/src/southbridge/intel/lynxpoint/hsio/lpt_h_cx.c
+@@ -0,0 +1,244 @@
++/* SPDX-License-Identifier: GPL-2.0-or-later */
++
++#include <device/pci_ops.h>
++#include <southbridge/intel/lynxpoint/hsio/hsio.h>
++#include <types.h>
++
++const struct hsio_table_row hsio_sata_shared_lpt_h_cx[] = {
++ { 0xea002008, ~0xfffc6108, 0xea6c6108 },
++ { 0xea002208, ~0xfffc6108, 0xea6c6108 },
++ { 0xea002038, ~0x3f00000f, 0x0700000d },
++ { 0xea002238, ~0x3f00000f, 0x0700000d },
++ { 0xea00202c, ~0x00020f00, 0x00020100 },
++ { 0xea00222c, ~0x00020f00, 0x00020100 },
++ { 0xea002040, ~0x1f000000, 0x01000000 },
++ { 0xea002240, ~0x1f000000, 0x01000000 },
++ { 0xea002010, ~0xffff0000, 0x0d510000 },
++ { 0xea002210, ~0xffff0000, 0x0d510000 },
++ { 0xea002018, ~0xffff0300, 0x38250100 },
++ { 0xea002218, ~0xffff0300, 0x38250100 },
++ { 0xea002000, ~0xcf030000, 0xcf030000 },
++ { 0xea002200, ~0xcf030000, 0xcf030000 },
++ { 0xea002028, ~0xff1f0000, 0x580e0000 },
++ { 0xea002228, ~0xff1f0000, 0x580e0000 },
++ { 0xea00201c, ~0x00007c00, 0x00002400 },
++ { 0xea00221c, ~0x00007c00, 0x00002400 },
++ { 0xea00208c, ~0x00ff0000, 0x00800000 },
++ { 0xea00228c, ~0x00ff0000, 0x00800000 },
++ { 0xea0020a4, ~0x0030ff00, 0x00308300 },
++ { 0xea0022a4, ~0x0030ff00, 0x00308300 },
++ { 0xea0020ac, ~0x00000030, 0x00000020 },
++ { 0xea0022ac, ~0x00000030, 0x00000020 },
++ { 0xea002140, ~0x00ffffff, 0x00140718 },
++ { 0xea002340, ~0x00ffffff, 0x00140718 },
++ { 0xea002144, ~0x00ffffff, 0x00140998 },
++ { 0xea002344, ~0x00ffffff, 0x00140998 },
++ { 0xea002148, ~0x00ffffff, 0x00140998 },
++ { 0xea002348, ~0x00ffffff, 0x00140998 },
++ { 0xea00217c, ~0x03000000, 0x03000000 },
++ { 0xea00237c, ~0x03000000, 0x03000000 },
++ { 0xea002178, ~0x00001f00, 0x00001800 },
++ { 0xea002378, ~0x00001f00, 0x00001800 },
++ { 0xea00210c, ~0x0038000f, 0x00000005 },
++ { 0xea00230c, ~0x0038000f, 0x00000005 },
++};
++
++const struct hsio_table_row hsio_sata_lpt_h_cx[] = {
++ { 0xea008008, ~0xff000000, 0x1c000000 },
++ { 0xea002408, ~0xfffc6108, 0xea6c6108 },
++ { 0xea002608, ~0xfffc6108, 0xea6c6108 },
++ { 0xea000808, ~0xfffc6108, 0xea6c6108 },
++ { 0xea000a08, ~0xfffc6108, 0xea6c6108 },
++ { 0xea002438, ~0x3f00000f, 0x0700000d },
++ { 0xea002638, ~0x3f00000f, 0x0700000d },
++ { 0xea000838, ~0x3f00000f, 0x0700000d },
++ { 0xea000a38, ~0x3f00000f, 0x0700000d },
++ { 0xea002440, ~0x1f000000, 0x01000000 },
++ { 0xea002640, ~0x1f000000, 0x01000000 },
++ { 0xea000840, ~0x1f000000, 0x01000000 },
++ { 0xea000a40, ~0x1f000000, 0x01000000 },
++ { 0xea002410, ~0xffff0000, 0x0d510000 },
++ { 0xea002610, ~0xffff0000, 0x0d510000 },
++ { 0xea000810, ~0xffff0000, 0x0d510000 },
++ { 0xea000a10, ~0xffff0000, 0x0d510000 },
++ { 0xea00242c, ~0x00020800, 0x00020000 },
++ { 0xea00262c, ~0x00020800, 0x00020000 },
++ { 0xea00082c, ~0x00020800, 0x00020000 },
++ { 0xea000a2c, ~0x00020800, 0x00020000 },
++ { 0xea002418, ~0xffff0300, 0x38250100 },
++ { 0xea002618, ~0xffff0300, 0x38250100 },
++ { 0xea000818, ~0xffff0300, 0x38250100 },
++ { 0xea000a18, ~0xffff0300, 0x38250100 },
++ { 0xea002400, ~0xcf030000, 0xcf030000 },
++ { 0xea002600, ~0xcf030000, 0xcf030000 },
++ { 0xea000800, ~0xcf030000, 0xcf030000 },
++ { 0xea000a00, ~0xcf030000, 0xcf030000 },
++ { 0xea002428, ~0xff1f0000, 0x580e0000 },
++ { 0xea002628, ~0xff1f0000, 0x580e0000 },
++ { 0xea000828, ~0xff1f0000, 0x580e0000 },
++ { 0xea000a28, ~0xff1f0000, 0x580e0000 },
++ { 0xea00241c, ~0x00007c00, 0x00002400 },
++ { 0xea00261c, ~0x00007c00, 0x00002400 },
++ { 0xea00081c, ~0x00007c00, 0x00002400 },
++ { 0xea000a1c, ~0x00007c00, 0x00002400 },
++ { 0xea00248c, ~0x00ff0000, 0x00800000 },
++ { 0xea00268c, ~0x00ff0000, 0x00800000 },
++ { 0xea00088c, ~0x00ff0000, 0x00800000 },
++ { 0xea000a8c, ~0x00ff0000, 0x00800000 },
++ { 0xea0024a4, ~0x0030ff00, 0x00308300 },
++ { 0xea0026a4, ~0x0030ff00, 0x00308300 },
++ { 0xea0008a4, ~0x0030ff00, 0x00308300 },
++ { 0xea000aa4, ~0x0030ff00, 0x00308300 },
++ { 0xea0024ac, ~0x00000030, 0x00000020 },
++ { 0xea0026ac, ~0x00000030, 0x00000020 },
++ { 0xea0008ac, ~0x00000030, 0x00000020 },
++ { 0xea000aac, ~0x00000030, 0x00000020 },
++ { 0xea002540, ~0x00ffffff, 0x00140718 },
++ { 0xea002740, ~0x00ffffff, 0x00140718 },
++ { 0xea000940, ~0x00ffffff, 0x00140718 },
++ { 0xea000b40, ~0x00ffffff, 0x00140718 },
++ { 0xea002544, ~0x00ffffff, 0x00140998 },
++ { 0xea002744, ~0x00ffffff, 0x00140998 },
++ { 0xea000944, ~0x00ffffff, 0x00140998 },
++ { 0xea000b44, ~0x00ffffff, 0x00140998 },
++ { 0xea002548, ~0x00ffffff, 0x00140998 },
++ { 0xea002748, ~0x00ffffff, 0x00140998 },
++ { 0xea000948, ~0x00ffffff, 0x00140998 },
++ { 0xea000b48, ~0x00ffffff, 0x00140998 },
++ { 0xea00257c, ~0x03000000, 0x03000000 },
++ { 0xea00277c, ~0x03000000, 0x03000000 },
++ { 0xea00097c, ~0x03000000, 0x03000000 },
++ { 0xea000b7c, ~0x03000000, 0x03000000 },
++ { 0xea002578, ~0x00001f00, 0x00001800 },
++ { 0xea002778, ~0x00001f00, 0x00001800 },
++ { 0xea000978, ~0x00001f00, 0x00001800 },
++ { 0xea000b78, ~0x00001f00, 0x00001800 },
++ { 0xea00250c, ~0x0038000f, 0x00000005 },
++ { 0xea00270c, ~0x0038000f, 0x00000005 },
++ { 0xea00090c, ~0x0038000f, 0x00000005 },
++ { 0xea000b0c, ~0x0038000f, 0x00000005 },
++};
++
++const struct hsio_table_row hsio_xhci_shared_lpt_h_cx[] = {
++ { 0xe9002c2c, ~0x00000700, 0x00000100 },
++ { 0xe9002e2c, ~0x00000700, 0x00000100 },
++ { 0xe9002dcc, ~0x00001407, 0x00001407 },
++ { 0xe9002fcc, ~0x00001407, 0x00001407 },
++ { 0xe9002d68, ~0x01000f3c, 0x00000a28 },
++ { 0xe9002f68, ~0x01000f3c, 0x00000a28 },
++ { 0xe9002d6c, ~0x000000ff, 0x0000003f },
++ { 0xe9002f6c, ~0x000000ff, 0x0000003f },
++ { 0xe9002d4c, ~0x00ffff00, 0x00120500 },
++ { 0xe9002f4c, ~0x00ffff00, 0x00120500 },
++ { 0xe9002d14, ~0x38000700, 0x00000100 },
++ { 0xe9002f14, ~0x38000700, 0x00000100 },
++ { 0xe9002d64, ~0x0000f000, 0x00005000 },
++ { 0xe9002f64, ~0x0000f000, 0x00005000 },
++ { 0xe9002d70, ~0x00000018, 0x00000000 },
++ { 0xe9002f70, ~0x00000018, 0x00000000 },
++ { 0xe9002c38, ~0x3f00000f, 0x0700000b },
++ { 0xe9002e38, ~0x3f00000f, 0x0700000b },
++ { 0xe9002d40, ~0x00800000, 0x00000000 },
++ { 0xe9002f40, ~0x00800000, 0x00000000 },
++};
++
++const struct hsio_table_row hsio_xhci_lpt_h_cx[] = {
++ { 0xe90031cc, ~0x00001407, 0x00001407 },
++ { 0xe90033cc, ~0x00001407, 0x00001407 },
++ { 0xe90015cc, ~0x00001407, 0x00001407 },
++ { 0xe90017cc, ~0x00001407, 0x00001407 },
++ { 0xe9003168, ~0x01000f3c, 0x00000a28 },
++ { 0xe9003368, ~0x01000f3c, 0x00000a28 },
++ { 0xe9001568, ~0x01000f3c, 0x00000a28 },
++ { 0xe9001768, ~0x01000f3c, 0x00000a28 },
++ { 0xe900316c, ~0x000000ff, 0x0000003f },
++ { 0xe900336c, ~0x000000ff, 0x0000003f },
++ { 0xe900156c, ~0x000000ff, 0x0000003f },
++ { 0xe900176c, ~0x000000ff, 0x0000003f },
++ { 0xe900314c, ~0x00ffff00, 0x00120500 },
++ { 0xe900334c, ~0x00ffff00, 0x00120500 },
++ { 0xe900154c, ~0x00ffff00, 0x00120500 },
++ { 0xe900174c, ~0x00ffff00, 0x00120500 },
++ { 0xe9003114, ~0x38000700, 0x00000100 },
++ { 0xe9003314, ~0x38000700, 0x00000100 },
++ { 0xe9001514, ~0x38000700, 0x00000100 },
++ { 0xe9001714, ~0x38000700, 0x00000100 },
++ { 0xe9003164, ~0x0000f000, 0x00005000 },
++ { 0xe9003364, ~0x0000f000, 0x00005000 },
++ { 0xe9001564, ~0x0000f000, 0x00005000 },
++ { 0xe9001764, ~0x0000f000, 0x00005000 },
++ { 0xe9003170, ~0x00000018, 0x00000000 },
++ { 0xe9003370, ~0x00000018, 0x00000000 },
++ { 0xe9001570, ~0x00000018, 0x00000000 },
++ { 0xe9001770, ~0x00000018, 0x00000000 },
++ { 0xe9003038, ~0x3f00000f, 0x0700000b },
++ { 0xe9003238, ~0x3f00000f, 0x0700000b },
++ { 0xe9001438, ~0x3f00000f, 0x0700000b },
++ { 0xe9001638, ~0x3f00000f, 0x0700000b },
++ { 0xe9003140, ~0x00800000, 0x00000000 },
++ { 0xe9003340, ~0x00800000, 0x00000000 },
++ { 0xe9001540, ~0x00800000, 0x00000000 },
++ { 0xe9001740, ~0x00800000, 0x00000000 },
++};
++
++void program_hsio_sata_lpt_h_cx(const bool is_mobile)
++{
++ const struct hsio_table_row *pch_hsio_table;
++ size_t len;
++
++ pch_hsio_table = hsio_sata_lpt_h_cx;
++ len = ARRAY_SIZE(hsio_sata_lpt_h_cx);
++ for (size_t i = 0; i < len; i++)
++ hsio_update_row(pch_hsio_table[i]);
++
++ pch_hsio_table = hsio_sata_shared_lpt_h_cx;
++ len = ARRAY_SIZE(hsio_sata_shared_lpt_h_cx);
++ for (size_t i = 0; i < len; i++)
++ hsio_sata_shared_update_row(pch_hsio_table[i]);
++
++ const uint32_t hsio_sata_value = is_mobile ? 0x00004c5a : 0x00003e67;
++
++ hsio_update(0xea002490, ~0x0000ffff, hsio_sata_value);
++ hsio_update(0xea002690, ~0x0000ffff, hsio_sata_value);
++ hsio_update(0xea000890, ~0x0000ffff, hsio_sata_value);
++ hsio_update(0xea000a90, ~0x0000ffff, hsio_sata_value);
++
++ hsio_sata_shared_update(0xea002090, ~0x0000ffff, hsio_sata_value);
++ hsio_sata_shared_update(0xea002290, ~0x0000ffff, hsio_sata_value);
++}
++
++void program_hsio_xhci_lpt_h_cx(void)
++{
++ const struct hsio_table_row *pch_hsio_table;
++ size_t len;
++
++ pch_hsio_table = hsio_xhci_lpt_h_cx;
++ len = ARRAY_SIZE(hsio_xhci_lpt_h_cx);
++
++ for (size_t i = 0; i < len; i++)
++ hsio_update_row(pch_hsio_table[i]);
++
++ pch_hsio_table = hsio_xhci_shared_lpt_h_cx;
++ len = ARRAY_SIZE(hsio_xhci_shared_lpt_h_cx);
++
++ for (size_t i = 0; i < len; i++)
++ hsio_xhci_shared_update_row(pch_hsio_table[i]);
++}
++
++void program_hsio_igbe_lpt_h_cx(void)
++{
++ const uint32_t strpfusecfg1 = pci_read_config32(PCI_DEV(0, 0x1c, 0), 0xfc);
++ if (!(strpfusecfg1 & (1 << 19)))
++ return;
++
++ const uint8_t gbe_port = (strpfusecfg1 >> 16) & 0x7;
++ const uint8_t lane_owner = pci_read_config8(PCI_DEV(0, 0x1c, 0), 0x410);
++ if (gbe_port == 0 && ((lane_owner >> 0) & 3) != 1)
++ return;
++
++ if (gbe_port == 1 && ((lane_owner >> 2) & 3) != 1)
++ return;
++
++ const uint32_t gbe_hsio_base = 0xe900 << 16 | (0x2e - 2 * gbe_port) << 8;
++ hsio_update(gbe_hsio_base + 0x08, ~0xf0000100, 0xe0000100);
++}
+diff --git a/src/southbridge/intel/lynxpoint/hsio/lpt_lp_bx.c b/src/southbridge/intel/lynxpoint/hsio/lpt_lp_bx.c
+new file mode 100644
+index 0000000000..24679e791a
+--- /dev/null
++++ b/src/southbridge/intel/lynxpoint/hsio/lpt_lp_bx.c
+@@ -0,0 +1,180 @@
++/* SPDX-License-Identifier: GPL-2.0-or-later */
++
++#include <device/pci_ops.h>
++#include <southbridge/intel/lynxpoint/iobp.h>
++#include <southbridge/intel/lynxpoint/hsio/hsio.h>
++#include <types.h>
++
++const struct hsio_table_row hsio_sata_shared_lpt_lp_bx[] = {
++ { 0xea008008, ~0xff000000, 0x1c000000 },
++ { 0xea002008, ~0xfffc6108, 0xea6c6108 },
++ { 0xea002208, ~0xfffc6108, 0xea6c6108 },
++ { 0xea002408, ~0xfffc6108, 0xea6c6108 },
++ { 0xea002608, ~0xfffc6108, 0xea6c6108 },
++ { 0xea002038, ~0x0000000f, 0x0000000d },
++ { 0xea002238, ~0x0000000f, 0x0000000d },
++ { 0xea002438, ~0x0000000f, 0x0000000d },
++ { 0xea002638, ~0x0000000f, 0x0000000d },
++ { 0xea00202c, ~0x00020f00, 0x00020100 },
++ { 0xea00222c, ~0x00020f00, 0x00020100 },
++ { 0xea00242c, ~0x00020f00, 0x00020100 },
++ { 0xea00262c, ~0x00020f00, 0x00020100 },
++ { 0xea002040, ~0x1f000000, 0x01000000 },
++ { 0xea002240, ~0x1f000000, 0x01000000 },
++ { 0xea002440, ~0x1f000000, 0x01000000 },
++ { 0xea002640, ~0x1f000000, 0x01000000 },
++ { 0xea002010, ~0xffff0000, 0x55510000 },
++ { 0xea002210, ~0xffff0000, 0x55510000 },
++ { 0xea002410, ~0xffff0000, 0x55510000 },
++ { 0xea002610, ~0xffff0000, 0x55510000 },
++ { 0xea002140, ~0x00ffffff, 0x00140718 },
++ { 0xea002340, ~0x00ffffff, 0x00140718 },
++ { 0xea002540, ~0x00ffffff, 0x00140718 },
++ { 0xea002740, ~0x00ffffff, 0x00140718 },
++ { 0xea002144, ~0x00ffffff, 0x00140998 },
++ { 0xea002344, ~0x00ffffff, 0x00140998 },
++ { 0xea002544, ~0x00ffffff, 0x00140998 },
++ { 0xea002744, ~0x00ffffff, 0x00140998 },
++ { 0xea002148, ~0x00ffffff, 0x00140998 },
++ { 0xea002348, ~0x00ffffff, 0x00140998 },
++ { 0xea002548, ~0x00ffffff, 0x00140998 },
++ { 0xea002748, ~0x00ffffff, 0x00140998 },
++ { 0xea00217c, ~0x03000000, 0x03000000 },
++ { 0xea00237c, ~0x03000000, 0x03000000 },
++ { 0xea00257c, ~0x03000000, 0x03000000 },
++ { 0xea00277c, ~0x03000000, 0x03000000 },
++ { 0xea00208c, ~0x00ff0000, 0x00800000 },
++ { 0xea00228c, ~0x00ff0000, 0x00800000 },
++ { 0xea00248c, ~0x00ff0000, 0x00800000 },
++ { 0xea00268c, ~0x00ff0000, 0x00800000 },
++ { 0xea0020a4, ~0x0030ff00, 0x00308300 },
++ { 0xea0022a4, ~0x0030ff00, 0x00308300 },
++ { 0xea0024a4, ~0x0030ff00, 0x00308300 },
++ { 0xea0026a4, ~0x0030ff00, 0x00308300 },
++ { 0xea0020ac, ~0x00000030, 0x00000020 },
++ { 0xea0022ac, ~0x00000030, 0x00000020 },
++ { 0xea0024ac, ~0x00000030, 0x00000020 },
++ { 0xea0026ac, ~0x00000030, 0x00000020 },
++ { 0xea002018, ~0xffff0300, 0x38250100 },
++ { 0xea002218, ~0xffff0300, 0x38250100 },
++ { 0xea002418, ~0xffff0300, 0x38250100 },
++ { 0xea002618, ~0xffff0300, 0x38250100 },
++ { 0xea002000, ~0xcf030000, 0xcf030000 },
++ { 0xea002200, ~0xcf030000, 0xcf030000 },
++ { 0xea002400, ~0xcf030000, 0xcf030000 },
++ { 0xea002600, ~0xcf030000, 0xcf030000 },
++ { 0xea002028, ~0xff1f0000, 0x580e0000 },
++ { 0xea002228, ~0xff1f0000, 0x580e0000 },
++ { 0xea002428, ~0xff1f0000, 0x580e0000 },
++ { 0xea002628, ~0xff1f0000, 0x580e0000 },
++ { 0xea00201c, ~0x00007c00, 0x00002400 },
++ { 0xea00221c, ~0x00007c00, 0x00002400 },
++ { 0xea00241c, ~0x00007c00, 0x00002400 },
++ { 0xea00261c, ~0x00007c00, 0x00002400 },
++ { 0xea002178, ~0x00001f00, 0x00001800 },
++ { 0xea002378, ~0x00001f00, 0x00001800 },
++ { 0xea002578, ~0x00001f00, 0x00001800 },
++ { 0xea002778, ~0x00001f00, 0x00001800 },
++ { 0xea00210c, ~0x0038000f, 0x00000005 },
++ { 0xea00230c, ~0x0038000f, 0x00000005 },
++ { 0xea00250c, ~0x0038000f, 0x00000005 },
++ { 0xea00270c, ~0x0038000f, 0x00000005 },
++};
++
++const struct hsio_table_row hsio_xhci_shared_lpt_lp_bx[] = {
++ { 0xe90025cc, ~0x00001407, 0x00001407 },
++ { 0xe90027cc, ~0x00001407, 0x00001407 },
++ { 0xe9002568, ~0x01000f3c, 0x00000a28 },
++ { 0xe9002768, ~0x01000f3c, 0x00000a28 },
++ { 0xe900242c, ~0x00000700, 0x00000100 },
++ { 0xe900262c, ~0x00000700, 0x00000100 },
++ { 0xe900256c, ~0x000000ff, 0x0000003f },
++ { 0xe900276c, ~0x000000ff, 0x0000003f },
++ { 0xe900254c, ~0x00ffff00, 0x00120500 },
++ { 0xe900274c, ~0x00ffff00, 0x00120500 },
++ { 0xe9002564, ~0x0000f000, 0x00005000 },
++ { 0xe9002764, ~0x0000f000, 0x00005000 },
++ { 0xe9002570, ~0x00000018, 0x00000000 },
++ { 0xe9002770, ~0x00000018, 0x00000000 },
++ { 0xe9002514, ~0x38000700, 0x00000100 },
++ { 0xe9002714, ~0x38000700, 0x00000100 },
++ { 0xe9002438, ~0x0000000f, 0x0000000b },
++ { 0xe9002638, ~0x0000000f, 0x0000000b },
++ { 0xe9002414, ~0x0000fe00, 0x00006600 },
++ { 0xe9002614, ~0x0000fe00, 0x00006600 },
++ { 0xe9002540, ~0x00800000, 0x00000000 },
++ { 0xe9002740, ~0x00800000, 0x00000000 },
++};
++
++const struct hsio_table_row hsio_xhci_lpt_lp_bx[] = {
++ { 0xe90021cc, ~0x00001407, 0x00001407 },
++ { 0xe90023cc, ~0x00001407, 0x00001407 },
++ { 0xe9002168, ~0x01000f3c, 0x00000a28 },
++ { 0xe9002368, ~0x01000f3c, 0x00000a28 },
++ { 0xe900216c, ~0x000000ff, 0x0000003f },
++ { 0xe900236c, ~0x000000ff, 0x0000003f },
++ { 0xe900214c, ~0x00ffff00, 0x00120500 },
++ { 0xe900234c, ~0x00ffff00, 0x00120500 },
++ { 0xe9002164, ~0x0000f000, 0x00005000 },
++ { 0xe9002364, ~0x0000f000, 0x00005000 },
++ { 0xe9002170, ~0x00000018, 0x00000000 },
++ { 0xe9002370, ~0x00000018, 0x00000000 },
++ { 0xe9002114, ~0x38000700, 0x00000100 },
++ { 0xe9002314, ~0x38000700, 0x00000100 },
++ { 0xe9002038, ~0x0000000f, 0x0000000b },
++ { 0xe9002238, ~0x0000000f, 0x0000000b },
++ { 0xe9002014, ~0x0000fe00, 0x00006600 },
++ { 0xe9002214, ~0x0000fe00, 0x00006600 },
++ { 0xe9002140, ~0x00800000, 0x00000000 },
++ { 0xe9002340, ~0x00800000, 0x00000000 },
++};
++
++void program_hsio_sata_lpt_lp_bx(const bool is_mobile)
++{
++ const struct hsio_table_row *pch_hsio_table;
++ size_t len;
++
++ pch_hsio_table = hsio_sata_shared_lpt_lp_bx;
++ len = ARRAY_SIZE(hsio_sata_shared_lpt_lp_bx);
++ for (size_t i = 0; i < len; i++)
++ hsio_sata_shared_update_row(pch_hsio_table[i]);
++
++ const uint32_t hsio_sata_value = is_mobile ? 0x00004c5a : 0x00003e67;
++
++ hsio_sata_shared_update(0xea002090, ~0x0000ffff, hsio_sata_value);
++ hsio_sata_shared_update(0xea002290, ~0x0000ffff, hsio_sata_value);
++ hsio_sata_shared_update(0xea002490, ~0x0000ffff, hsio_sata_value);
++ hsio_sata_shared_update(0xea002690, ~0x0000ffff, hsio_sata_value);
++}
++
++void program_hsio_xhci_lpt_lp_bx(void)
++{
++ const struct hsio_table_row *pch_hsio_table;
++ size_t len;
++
++ pch_hsio_table = hsio_xhci_lpt_lp_bx;
++ len = ARRAY_SIZE(hsio_xhci_lpt_lp_bx);
++
++ for (size_t i = 0; i < len; i++)
++ hsio_update_row(pch_hsio_table[i]);
++
++ pch_hsio_table = hsio_xhci_shared_lpt_lp_bx;
++ len = ARRAY_SIZE(hsio_xhci_shared_lpt_lp_bx);
++
++ for (size_t i = 0; i < len; i++)
++ hsio_xhci_shared_update_row(pch_hsio_table[i]);
++}
++
++void program_hsio_igbe_lpt_lp_bx(void)
++{
++ const uint32_t strpfusecfg1 = pci_read_config32(PCI_DEV(0, 0x1c, 0), 0xfc);
++ if (!(strpfusecfg1 & (1 << 19)))
++ return;
++
++ const uint8_t gbe_port = (strpfusecfg1 >> 16) & 0x7;
++ if (gbe_port > 5)
++ return;
++
++ const uint32_t gbe_hsio_base = 0xe900 << 16 | (0x08 + 2 * gbe_port) << 8;
++ hsio_update(gbe_hsio_base + 0x08, ~0xf0000100, 0xe0000100);
++}
+diff --git a/src/southbridge/intel/lynxpoint/pch.h b/src/southbridge/intel/lynxpoint/pch.h
+index 38a9349220..74b4d50017 100644
+--- a/src/southbridge/intel/lynxpoint/pch.h
++++ b/src/southbridge/intel/lynxpoint/pch.h
+@@ -117,6 +117,7 @@ void pch_dmi_setup_physical_layer(void);
+ void pch_dmi_tc_vc_mapping(u32 vc0, u32 vc1, u32 vcp, u32 vcm);
+ void early_usb_init(void);
+ void early_thermal_init(void);
++void early_pch_init_native(int s3resume);
+
+ void usb_ehci_sleep_prepare(pci_devfn_t dev, u8 slp_typ);
+ void usb_ehci_disable(pci_devfn_t dev);
+@@ -271,6 +272,10 @@ void mainboard_config_rcba(void);
+ #define IDE_DECODE_ENABLE (1 << 15)
+ #define IDE_TIM_SEC 0x42 /* IDE timings, secondary */
+
++#define SATA_MAP 0x90
++#define SATA_PCS 0x92
++#define SATA_SCLKG 0x94
++
+ #define SATA_SIRI 0xa0 /* SATA Indexed Register Index */
+ #define SATA_SIRD 0xa4 /* SATA Indexed Register Data */
+ #define SATA_SP 0xd0 /* Scratchpad */
+@@ -580,6 +585,7 @@ void mainboard_config_rcba(void);
+ #define D19IR 0x3168 /* 16bit */
+ #define ACPIIRQEN 0x31e0 /* 32bit */
+ #define OIC 0x31fe /* 16bit */
++#define PRSTS 0x3310 /* 32bit */
+ #define PMSYNC_CONFIG 0x33c4 /* 32bit */
+ #define PMSYNC_CONFIG2 0x33cc /* 32bit */
+ #define SOFT_RESET_CTRL 0x38f4
+--
+2.39.2
+
diff --git a/config/coreboot/haswell/patches/0008-nb-intel-haswell-Add-native-raminit-scaffolding.patch b/config/coreboot/haswell/patches/0008-nb-intel-haswell-Add-native-raminit-scaffolding.patch
new file mode 100644
index 00000000..6df828eb
--- /dev/null
+++ b/config/coreboot/haswell/patches/0008-nb-intel-haswell-Add-native-raminit-scaffolding.patch
@@ -0,0 +1,407 @@
+From 46cdec8cbce15ca11ad9a49a3ee415a78f781997 Mon Sep 17 00:00:00 2001
+From: Angel Pons <th3fanbus@gmail.com>
+Date: Sat, 7 May 2022 00:26:10 +0200
+Subject: [PATCH 08/26] nb/intel/haswell: Add native raminit scaffolding
+
+Implement some scaffolding for Haswell native raminit, like bootmode
+selection, handling of MRC cache and CPU detection.
+
+Change-Id: Icd96649fa045ea7f0f32ae9bfe1e60498d93975b
+Signed-off-by: Angel Pons <th3fanbus@gmail.com>
+---
+ .../intel/haswell/native_raminit/Makefile.inc | 1 +
+ .../haswell/native_raminit/raminit_main.c | 104 ++++++++++
+ .../haswell/native_raminit/raminit_native.c | 189 +++++++++++++++++-
+ .../haswell/native_raminit/raminit_native.h | 34 ++++
+ 4 files changed, 322 insertions(+), 6 deletions(-)
+ create mode 100644 src/northbridge/intel/haswell/native_raminit/raminit_main.c
+ create mode 100644 src/northbridge/intel/haswell/native_raminit/raminit_native.h
+
+diff --git a/src/northbridge/intel/haswell/native_raminit/Makefile.inc b/src/northbridge/intel/haswell/native_raminit/Makefile.inc
+index 8cfb4fb33e..90af951c5a 100644
+--- a/src/northbridge/intel/haswell/native_raminit/Makefile.inc
++++ b/src/northbridge/intel/haswell/native_raminit/Makefile.inc
+@@ -1,3 +1,4 @@
+ ## SPDX-License-Identifier: GPL-2.0-or-later
+
++romstage-y += raminit_main.c
+ romstage-y += raminit_native.c
+diff --git a/src/northbridge/intel/haswell/native_raminit/raminit_main.c b/src/northbridge/intel/haswell/native_raminit/raminit_main.c
+new file mode 100644
+index 0000000000..9b42c25b40
+--- /dev/null
++++ b/src/northbridge/intel/haswell/native_raminit/raminit_main.c
+@@ -0,0 +1,104 @@
++/* SPDX-License-Identifier: GPL-2.0-or-later */
++
++#include <assert.h>
++#include <console/console.h>
++#include <cpu/intel/haswell/haswell.h>
++#include <delay.h>
++#include <device/pci_ops.h>
++#include <northbridge/intel/haswell/chip.h>
++#include <northbridge/intel/haswell/haswell.h>
++#include <northbridge/intel/haswell/raminit.h>
++#include <string.h>
++#include <types.h>
++
++#include "raminit_native.h"
++
++struct task_entry {
++ enum raminit_status (*task)(struct sysinfo *);
++ bool is_enabled;
++ const char *name;
++};
++
++static const struct task_entry cold_boot[] = {
++};
++
++/* Return a generic stepping value to make stepping checks simpler */
++static enum generic_stepping get_stepping(const uint32_t cpuid)
++{
++ switch (cpuid) {
++ case CPUID_HASWELL_A0:
++ die("Haswell stepping A0 is not supported\n");
++ case CPUID_HASWELL_B0:
++ case CPUID_HASWELL_ULT_B0:
++ case CPUID_CRYSTALWELL_B0:
++ return STEPPING_B0;
++ case CPUID_HASWELL_C0:
++ case CPUID_HASWELL_ULT_C0:
++ case CPUID_CRYSTALWELL_C0:
++ return STEPPING_C0;
++ default:
++ /** TODO: Add Broadwell support someday **/
++ die("Unknown CPUID 0x%x\n", cpuid);
++ }
++}
++
++static void initialize_ctrl(struct sysinfo *ctrl)
++{
++ const struct northbridge_intel_haswell_config *cfg = config_of_soc();
++ const enum raminit_boot_mode bootmode = ctrl->bootmode;
++
++ memset(ctrl, 0, sizeof(*ctrl));
++
++ ctrl->cpu = cpu_get_cpuid();
++ ctrl->stepping = get_stepping(ctrl->cpu);
++ ctrl->dq_pins_interleaved = cfg->dq_pins_interleaved;
++ ctrl->bootmode = bootmode;
++}
++
++static enum raminit_status try_raminit(struct sysinfo *ctrl)
++{
++ const struct task_entry *const schedule = cold_boot;
++ const size_t length = ARRAY_SIZE(cold_boot);
++
++ enum raminit_status status = RAMINIT_STATUS_UNSPECIFIED_ERROR;
++
++ for (size_t i = 0; i < length; i++) {
++ const struct task_entry *const entry = &schedule[i];
++ assert(entry);
++ assert(entry->name);
++ if (!entry->is_enabled)
++ continue;
++
++ assert(entry->task);
++ printk(RAM_DEBUG, "\nExecuting raminit task %s\n", entry->name);
++ status = entry->task(ctrl);
++ printk(RAM_DEBUG, "\n");
++ if (status) {
++ printk(BIOS_ERR, "raminit failed on step %s\n", entry->name);
++ break;
++ }
++ }
++
++ return status;
++}
++
++void raminit_main(const enum raminit_boot_mode bootmode)
++{
++ /*
++ * The mighty_ctrl struct. Will happily nuke the pre-RAM stack
++ * if left unattended. Make it static and pass pointers to it.
++ */
++ static struct sysinfo mighty_ctrl;
++
++ mighty_ctrl.bootmode = bootmode;
++ initialize_ctrl(&mighty_ctrl);
++
++ /** TODO: Try more than once **/
++ enum raminit_status status = try_raminit(&mighty_ctrl);
++
++ if (status != RAMINIT_STATUS_SUCCESS)
++ die("Memory initialization was met with utmost failure and misery\n");
++
++ /** TODO: Implement the required magic **/
++ die("NATIVE RAMINIT: More Magic (tm) required.\n");
++}
+diff --git a/src/northbridge/intel/haswell/native_raminit/raminit_native.c b/src/northbridge/intel/haswell/native_raminit/raminit_native.c
+index b6efb6b40d..0869db3902 100644
+--- a/src/northbridge/intel/haswell/native_raminit/raminit_native.c
++++ b/src/northbridge/intel/haswell/native_raminit/raminit_native.c
+@@ -1,13 +1,45 @@
+ /* SPDX-License-Identifier: GPL-2.0-or-later */
+
++#include <arch/cpu.h>
++#include <assert.h>
++#include <cbmem.h>
++#include <cf9_reset.h>
+ #include <console/console.h>
++#include <cpu/x86/msr.h>
+ #include <delay.h>
++#include <device/pci_ops.h>
++#include <mrc_cache.h>
+ #include <northbridge/intel/haswell/haswell.h>
+ #include <northbridge/intel/haswell/raminit.h>
+ #include <southbridge/intel/lynxpoint/me.h>
+ #include <southbridge/intel/lynxpoint/pch.h>
+ #include <types.h>
+
++#include "raminit_native.h"
++
++static void wait_txt_clear(void)
++{
++ const struct cpuid_result cpuid = cpuid_ext(1, 0);
++
++ /* Check if TXT is supported */
++ if (!(cpuid.ecx & BIT(6)))
++ return;
++
++ /* Some TXT public bit */
++ if (!(read32p(0xfed30010) & 1))
++ return;
++
++ /* Wait for TXT clear */
++ do {} while (!(read8p(0xfed40000) & (1 << 7)));
++}
++
++static enum raminit_boot_mode get_boot_mode(void)
++{
++ const uint16_t pmcon_2 = pci_read_config16(PCH_LPC_DEV, GEN_PMCON_2);
++ const uint16_t bitmask = GEN_PMCON_2_DISB | GEN_PMCON_2_MEM_SR;
++ return (pmcon_2 & bitmask) == bitmask ? BOOTMODE_WARM : BOOTMODE_COLD;
++}
++
+ static bool early_init_native(int s3resume)
+ {
+ printk(BIOS_DEBUG, "Starting native platform initialisation\n");
+@@ -24,6 +56,120 @@ static bool early_init_native(int s3resume)
+ return cpu_replaced;
+ }
+
++#define MRC_CACHE_VERSION 1
++
++struct mrc_data {
++ const void *buffer;
++ size_t buffer_len;
++};
++
++static void save_mrc_data(struct mrc_data *md)
++{
++ mrc_cache_stash_data(MRC_TRAINING_DATA, MRC_CACHE_VERSION, md->buffer, md->buffer_len);
++}
++
++static struct mrc_data prepare_mrc_cache(void)
++{
++ struct mrc_data md = {0};
++ md.buffer = mrc_cache_current_mmap_leak(MRC_TRAINING_DATA,
++ MRC_CACHE_VERSION,
++ &md.buffer_len);
++ return md;
++}
++
++static const char *const bm_names[] = {
++ "BOOTMODE_COLD",
++ "BOOTMODE_WARM",
++ "BOOTMODE_S3",
++ "BOOTMODE_FAST",
++};
++
++static void clear_disb(void)
++{
++ pci_and_config16(PCH_LPC_DEV, GEN_PMCON_2, ~GEN_PMCON_2_DISB);
++}
++
++static void raminit_reset(void)
++{
++ clear_disb();
++ system_reset();
++}
++
++static enum raminit_boot_mode do_actual_raminit(
++ struct mrc_data *md,
++ const bool s3resume,
++ const bool cpu_replaced,
++ const enum raminit_boot_mode orig_bootmode)
++{
++ enum raminit_boot_mode bootmode = orig_bootmode;
++
++ bool save_data_valid = md->buffer && md->buffer_len == USHRT_MAX; /** TODO: sizeof() **/
++
++ if (s3resume) {
++ if (bootmode == BOOTMODE_COLD) {
++ printk(BIOS_EMERG, "Memory may not be in self-refresh for S3 resume\n");
++ printk(BIOS_EMERG, "S3 resume and cold boot are mutually exclusive\n");
++ raminit_reset();
++ }
++ /* Only a true mad hatter would replace a CPU in S3 */
++ if (cpu_replaced) {
++ printk(BIOS_EMERG, "Oh no, CPU was replaced during S3\n");
++ /*
++ * No reason to continue, memory consistency is most likely lost
++ * and ME will probably request a reset through DID response too.
++ */
++ /** TODO: Figure out why past self commented this out **/
++ //raminit_reset();
++ }
++ bootmode = BOOTMODE_S3;
++ if (!save_data_valid) {
++ printk(BIOS_EMERG, "No training data, S3 resume is impossible\n");
++ /* Failed S3 resume, reset to come up cleanly */
++ raminit_reset();
++ }
++ }
++ if (!s3resume && cpu_replaced) {
++ printk(BIOS_NOTICE, "CPU was replaced, forcing a cold boot\n");
++ /*
++ * Looks like the ME will get angry if raminit takes too long.
++ * It will report that the CPU has been replaced on next boot.
++ * Try to continue anyway. This should not happen in most cases.
++ */
++ /** TODO: Figure out why past self commented this out **/
++ //save_data_valid = false;
++ }
++ if (bootmode == BOOTMODE_COLD) {
++ /* If possible, promote to a fast boot */
++ if (save_data_valid)
++ bootmode = BOOTMODE_FAST;
++
++ clear_disb();
++ } else if (bootmode == BOOTMODE_WARM) {
++ /* If a warm reset happened before raminit is done, force a cold boot */
++ if (mchbar_read32(SSKPD) == 0 && mchbar_read32(SSKPD + 4) == 0) {
++ printk(BIOS_NOTICE, "Warm reset occurred early in cold boot\n");
++ save_data_valid = false;
++ }
++ if (!save_data_valid)
++ bootmode = BOOTMODE_COLD;
++ }
++ assert(save_data_valid != (bootmode == BOOTMODE_COLD));
++ if (save_data_valid) {
++ printk(BIOS_INFO, "Using cached memory parameters\n");
++ die("RAMINIT: Fast boot is not yet implemented\n");
++ }
++ printk(RAM_DEBUG, "Initial bootmode: %s\n", bm_names[orig_bootmode]);
++ printk(RAM_DEBUG, "Current bootmode: %s\n", bm_names[bootmode]);
++
++ /*
++ * And now, the actual memory initialization thing.
++ */
++ printk(RAM_DEBUG, "\nStarting native raminit\n");
++ raminit_main(bootmode);
++
++ return bootmode;
++}
++
+ void perform_raminit(const int s3resume)
+ {
+ /*
+@@ -32,17 +178,48 @@ void perform_raminit(const int s3resume)
+ */
+ const bool cpu_replaced = early_init_native(s3resume);
+
+- (void)cpu_replaced;
++ wait_txt_clear();
++ wrmsr(0x2e6, (msr_t) {.lo = 0, .hi = 0});
++
++ const enum raminit_boot_mode orig_bootmode = get_boot_mode();
++
++ struct mrc_data md = prepare_mrc_cache();
++
++ const enum raminit_boot_mode bootmode =
++ do_actual_raminit(&md, s3resume, cpu_replaced, orig_bootmode);
++
++ /** TODO: report_memory_config **/
+
+- /** TODO: Move after raminit */
+ if (intel_early_me_uma_size() > 0) {
+- /** TODO: Update status once raminit is implemented **/
+- uint8_t me_status = ME_INIT_STATUS_ERROR;
++ /*
++ * The 'other' success value is to report loss of memory
++ * consistency to ME if warm boot was downgraded to cold.
++ */
++ uint8_t me_status;
++ if (BOOTMODE_WARM == orig_bootmode && BOOTMODE_COLD == bootmode)
++ me_status = ME_INIT_STATUS_SUCCESS_OTHER;
++ else
++ me_status = ME_INIT_STATUS_SUCCESS;
++
++ /** TODO: Remove this once raminit is implemented **/
++ me_status = ME_INIT_STATUS_ERROR;
+ intel_early_me_init_done(me_status);
+ }
+
++ post_code(0x3b);
++
+ intel_early_me_status();
+
+- /** TODO: Implement the required magic **/
+- die("NATIVE RAMINIT: More Magic (tm) required.\n");
++ const bool cbmem_was_initted = !cbmem_recovery(s3resume);
++ if (s3resume && !cbmem_was_initted) {
++ /* Failed S3 resume, reset to come up cleanly */
++ printk(BIOS_CRIT, "Failed to recover CBMEM in S3 resume.\n");
++ system_reset();
++ }
++
++ /* Save training data on non-S3 resumes */
++ if (!s3resume)
++ save_mrc_data(&md);
++
++ /** TODO: setup_sdram_meminfo **/
+ }
+diff --git a/src/northbridge/intel/haswell/native_raminit/raminit_native.h b/src/northbridge/intel/haswell/native_raminit/raminit_native.h
+new file mode 100644
+index 0000000000..885f0184f4
+--- /dev/null
++++ b/src/northbridge/intel/haswell/native_raminit/raminit_native.h
+@@ -0,0 +1,34 @@
++/* SPDX-License-Identifier: GPL-2.0-or-later */
++
++#ifndef HASWELL_RAMINIT_NATIVE_H
++#define HASWELL_RAMINIT_NATIVE_H
++
++enum raminit_boot_mode {
++ BOOTMODE_COLD,
++ BOOTMODE_WARM,
++ BOOTMODE_S3,
++ BOOTMODE_FAST,
++};
++
++enum raminit_status {
++ RAMINIT_STATUS_SUCCESS = 0,
++ RAMINIT_STATUS_UNSPECIFIED_ERROR, /** TODO: Deprecated in favor of specific values **/
++};
++
++enum generic_stepping {
++ STEPPING_A0 = 1,
++ STEPPING_B0 = 2,
++ STEPPING_C0 = 3,
++};
++
++struct sysinfo {
++ enum raminit_boot_mode bootmode;
++ enum generic_stepping stepping;
++ uint32_t cpu; /* CPUID value */
++
++ bool dq_pins_interleaved;
++};
++
++void raminit_main(enum raminit_boot_mode bootmode);
++
++#endif
+--
+2.39.2
+
diff --git a/config/coreboot/haswell/patches/0009-nb-intel-haswell-nri-Only-do-CPU-replacement-check-o.patch b/config/coreboot/haswell/patches/0009-nb-intel-haswell-nri-Only-do-CPU-replacement-check-o.patch
new file mode 100644
index 00000000..07525d18
--- /dev/null
+++ b/config/coreboot/haswell/patches/0009-nb-intel-haswell-nri-Only-do-CPU-replacement-check-o.patch
@@ -0,0 +1,57 @@
+From 731216aef3129ae27ad5adc7266cb8a58090c9fc Mon Sep 17 00:00:00 2001
+From: Angel Pons <th3fanbus@gmail.com>
+Date: Sun, 26 Jun 2022 10:32:12 +0200
+Subject: [PATCH 09/26] nb/intel/haswell/nri: Only do CPU replacement check on
+ cold boots
+
+CPU replacement check should only be done on cold boots.
+
+Change-Id: I98efa105f4df755b23febe12dd7b356787847852
+Signed-off-by: Angel Pons <th3fanbus@gmail.com>
+---
+ .../intel/haswell/native_raminit/raminit_native.c | 13 ++++++-------
+ 1 file changed, 6 insertions(+), 7 deletions(-)
+
+diff --git a/src/northbridge/intel/haswell/native_raminit/raminit_native.c b/src/northbridge/intel/haswell/native_raminit/raminit_native.c
+index 0869db3902..bd9bc8e692 100644
+--- a/src/northbridge/intel/haswell/native_raminit/raminit_native.c
++++ b/src/northbridge/intel/haswell/native_raminit/raminit_native.c
+@@ -40,15 +40,14 @@ static enum raminit_boot_mode get_boot_mode(void)
+ return (pmcon_2 & bitmask) == bitmask ? BOOTMODE_WARM : BOOTMODE_COLD;
+ }
+
+-static bool early_init_native(int s3resume)
++static bool early_init_native(enum raminit_boot_mode bootmode)
+ {
+ printk(BIOS_DEBUG, "Starting native platform initialisation\n");
+
+ intel_early_me_init();
+- /** TODO: CPU replacement check must be skipped in warm boots and S3 resumes **/
+- const bool cpu_replaced = !s3resume && intel_early_me_cpu_replacement_check();
++ bool cpu_replaced = bootmode == BOOTMODE_COLD && intel_early_me_cpu_replacement_check();
+
+- early_pch_init_native(s3resume);
++ early_pch_init_native(bootmode == BOOTMODE_S3);
+
+ if (!CONFIG(INTEL_LYNXPOINT_LP))
+ dmi_early_init();
+@@ -176,13 +175,13 @@ void perform_raminit(const int s3resume)
+ * See, this function's name is a lie. There are more things to
+ * do that memory initialisation, but they are relatively easy.
+ */
+- const bool cpu_replaced = early_init_native(s3resume);
++ const enum raminit_boot_mode orig_bootmode = get_boot_mode();
++
++ const bool cpu_replaced = early_init_native(s3resume ? BOOTMODE_S3 : orig_bootmode);
+
+ wait_txt_clear();
+ wrmsr(0x2e6, (msr_t) {.lo = 0, .hi = 0});
+
+- const enum raminit_boot_mode orig_bootmode = get_boot_mode();
+-
+ struct mrc_data md = prepare_mrc_cache();
+
+ const enum raminit_boot_mode bootmode =
+--
+2.39.2
+
diff --git a/config/coreboot/haswell/patches/0010-haswell-NRI-Collect-SPD-info.patch b/config/coreboot/haswell/patches/0010-haswell-NRI-Collect-SPD-info.patch
new file mode 100644
index 00000000..4c2a2670
--- /dev/null
+++ b/config/coreboot/haswell/patches/0010-haswell-NRI-Collect-SPD-info.patch
@@ -0,0 +1,344 @@
+From 354969af4361bcc7dc240ef5871d169728f7f0cc Mon Sep 17 00:00:00 2001
+From: Angel Pons <th3fanbus@gmail.com>
+Date: Sat, 7 May 2022 13:48:53 +0200
+Subject: [PATCH 10/26] haswell NRI: Collect SPD info
+
+Collect SPD data from DIMMs and memory-down, and find the common
+supported settings.
+
+Change-Id: I4e6a1408a638a463ecae37a447cfed1d6556e44a
+Signed-off-by: Angel Pons <th3fanbus@gmail.com>
+---
+ .../intel/haswell/native_raminit/Makefile.inc | 1 +
+ .../haswell/native_raminit/raminit_main.c | 1 +
+ .../haswell/native_raminit/raminit_native.h | 57 +++++
+ .../haswell/native_raminit/spd_bitmunching.c | 206 ++++++++++++++++++
+ 4 files changed, 265 insertions(+)
+ create mode 100644 src/northbridge/intel/haswell/native_raminit/spd_bitmunching.c
+
+diff --git a/src/northbridge/intel/haswell/native_raminit/Makefile.inc b/src/northbridge/intel/haswell/native_raminit/Makefile.inc
+index 90af951c5a..ebf7abc6ec 100644
+--- a/src/northbridge/intel/haswell/native_raminit/Makefile.inc
++++ b/src/northbridge/intel/haswell/native_raminit/Makefile.inc
+@@ -2,3 +2,4 @@
+
+ romstage-y += raminit_main.c
+ romstage-y += raminit_native.c
++romstage-y += spd_bitmunching.c
+diff --git a/src/northbridge/intel/haswell/native_raminit/raminit_main.c b/src/northbridge/intel/haswell/native_raminit/raminit_main.c
+index 9b42c25b40..2d2cfa48bb 100644
+--- a/src/northbridge/intel/haswell/native_raminit/raminit_main.c
++++ b/src/northbridge/intel/haswell/native_raminit/raminit_main.c
+@@ -20,6 +20,7 @@ struct task_entry {
+ };
+
+ static const struct task_entry cold_boot[] = {
++ { collect_spd_info, true, "PROCSPD", },
+ };
+
+ /* Return a generic stepping value to make stepping checks simpler */
+diff --git a/src/northbridge/intel/haswell/native_raminit/raminit_native.h b/src/northbridge/intel/haswell/native_raminit/raminit_native.h
+index 885f0184f4..1a0793947e 100644
+--- a/src/northbridge/intel/haswell/native_raminit/raminit_native.h
++++ b/src/northbridge/intel/haswell/native_raminit/raminit_native.h
+@@ -3,6 +3,15 @@
+ #ifndef HASWELL_RAMINIT_NATIVE_H
+ #define HASWELL_RAMINIT_NATIVE_H
+
++#include <device/dram/ddr3.h>
++#include <northbridge/intel/haswell/haswell.h>
++
++#define SPD_LEN 256
++
++/* 8 data lanes + 1 ECC lane */
++#define NUM_LANES 9
++#define NUM_LANES_NO_ECC 8
++
+ enum raminit_boot_mode {
+ BOOTMODE_COLD,
+ BOOTMODE_WARM,
+@@ -12,6 +21,8 @@ enum raminit_boot_mode {
+
+ enum raminit_status {
+ RAMINIT_STATUS_SUCCESS = 0,
++ RAMINIT_STATUS_NO_MEMORY_INSTALLED,
++ RAMINIT_STATUS_UNSUPPORTED_MEMORY,
+ RAMINIT_STATUS_UNSPECIFIED_ERROR, /** TODO: Deprecated in favor of specific values **/
+ };
+
+@@ -21,14 +32,60 @@ enum generic_stepping {
+ STEPPING_C0 = 3,
+ };
+
++struct raminit_dimm_info {
++ spd_raw_data raw_spd;
++ struct dimm_attr_ddr3_st data;
++ uint8_t spd_addr;
++ bool valid;
++};
++
+ struct sysinfo {
+ enum raminit_boot_mode bootmode;
+ enum generic_stepping stepping;
+ uint32_t cpu; /* CPUID value */
+
+ bool dq_pins_interleaved;
++
++ /** TODO: ECC support untested **/
++ bool is_ecc;
++
++ /**
++ * FIXME: LPDDR support is incomplete. The largest chunks are missing,
++ * but some LPDDR-specific variations in algorithms have been handled.
++ * LPDDR-specific functions have stubs which will halt upon execution.
++ */
++ bool lpddr;
++
++ struct raminit_dimm_info dimms[NUM_CHANNELS][NUM_SLOTS];
++ union dimm_flags_ddr3_st flags;
++ uint16_t cas_supported;
++
++ /* Except for tCK, everything is eventually stored in DCLKs */
++ uint32_t tCK;
++ uint32_t tAA; /* Also known as tCL */
++ uint32_t tWR;
++ uint32_t tRCD;
++ uint32_t tRRD;
++ uint32_t tRP;
++ uint32_t tRAS;
++ uint32_t tRC;
++ uint32_t tRFC;
++ uint32_t tWTR;
++ uint32_t tRTP;
++ uint32_t tFAW;
++ uint32_t tCWL;
++ uint32_t tCMD;
++
++ uint8_t lanes; /* 8 or 9 */
++ uint8_t chanmap;
++ uint8_t dpc[NUM_CHANNELS]; /* DIMMs per channel */
++ uint8_t rankmap[NUM_CHANNELS];
++ uint8_t rank_mirrored[NUM_CHANNELS];
++ uint32_t channel_size_mb[NUM_CHANNELS];
+ };
+
+ void raminit_main(enum raminit_boot_mode bootmode);
+
++enum raminit_status collect_spd_info(struct sysinfo *ctrl);
++
+ #endif
+diff --git a/src/northbridge/intel/haswell/native_raminit/spd_bitmunching.c b/src/northbridge/intel/haswell/native_raminit/spd_bitmunching.c
+new file mode 100644
+index 0000000000..dbe02c72d0
+--- /dev/null
++++ b/src/northbridge/intel/haswell/native_raminit/spd_bitmunching.c
+@@ -0,0 +1,206 @@
++/* SPDX-License-Identifier: GPL-2.0-or-later */
++
++#include <cbfs.h>
++#include <commonlib/clamp.h>
++#include <console/console.h>
++#include <device/dram/ddr3.h>
++#include <device/smbus_host.h>
++#include <northbridge/intel/haswell/haswell.h>
++#include <northbridge/intel/haswell/raminit.h>
++#include <string.h>
++#include <types.h>
++
++#include "raminit_native.h"
++
++static const uint8_t *get_spd_data_from_cbfs(struct spd_info *spdi)
++{
++ if (!CONFIG(HAVE_SPD_IN_CBFS))
++ return NULL;
++
++ printk(RAM_DEBUG, "SPD index %u\n", spdi->spd_index);
++
++ size_t spd_file_len;
++ uint8_t *spd_file = cbfs_map("spd.bin", &spd_file_len);
++
++ if (!spd_file) {
++ printk(BIOS_ERR, "SPD data not found in CBFS\n");
++ return NULL;
++ }
++
++ if (spd_file_len < ((spdi->spd_index + 1) * SPD_LEN)) {
++ printk(BIOS_ERR, "SPD index override to 0 - old hardware?\n");
++ spdi->spd_index = 0;
++ }
++
++ if (spd_file_len < SPD_LEN) {
++ printk(BIOS_ERR, "Invalid SPD data in CBFS\n");
++ return NULL;
++ }
++
++ return spd_file + (spdi->spd_index * SPD_LEN);
++}
++
++static void get_spd_for_dimm(struct raminit_dimm_info *const dimm, const uint8_t *cbfs_spd)
++{
++ if (dimm->spd_addr == SPD_MEMORY_DOWN) {
++ if (cbfs_spd) {
++ memcpy(dimm->raw_spd, cbfs_spd, SPD_LEN);
++ dimm->valid = true;
++ printk(RAM_DEBUG, "memory-down\n");
++ return;
++ } else {
++ printk(RAM_DEBUG, "memory-down but no CBFS SPD data, ignoring\n");
++ return;
++ }
++ }
++ printk(RAM_DEBUG, "slotted ");
++ const uint8_t spd_mem_type = smbus_read_byte(dimm->spd_addr, SPD_MEMORY_TYPE);
++ if (spd_mem_type != SPD_MEMORY_TYPE_SDRAM_DDR3) {
++ printk(RAM_DEBUG, "and not DDR3, ignoring\n");
++ return;
++ }
++ printk(RAM_DEBUG, "and DDR3\n");
++ if (i2c_eeprom_read(dimm->spd_addr, 0, SPD_LEN, dimm->raw_spd) != SPD_LEN) {
++ printk(BIOS_WARNING, "I2C block read failed, trying SMBus byte reads\n");
++ for (uint32_t i = 0; i < SPD_LEN; i++)
++ dimm->raw_spd[i] = smbus_read_byte(dimm->spd_addr, i);
++ }
++ dimm->valid = true;
++}
++
++static void get_spd_data(struct sysinfo *ctrl)
++{
++ struct spd_info spdi = {0};
++ mb_get_spd_map(&spdi);
++ const uint8_t *cbfs_spd = get_spd_data_from_cbfs(&spdi);
++ for (uint8_t channel = 0; channel < NUM_CHANNELS; channel++) {
++ for (uint8_t slot = 0; slot < NUM_SLOTS; slot++) {
++ struct raminit_dimm_info *const dimm = &ctrl->dimms[channel][slot];
++ dimm->spd_addr = spdi.addresses[channel + channel + slot];
++ if (!dimm->spd_addr)
++ continue;
++
++ printk(RAM_DEBUG, "CH%uS%u is ", channel, slot);
++ get_spd_for_dimm(dimm, cbfs_spd);
++ }
++ }
++}
++
++static void decode_spd(struct raminit_dimm_info *const dimm)
++{
++ /** TODO: Hook up somewhere, and handle lack of XMP data **/
++ const bool enable_xmp = false;
++ memset(&dimm->data, 0, sizeof(dimm->data));
++ if (enable_xmp)
++ spd_xmp_decode_ddr3(&dimm->data, dimm->raw_spd, DDR3_XMP_PROFILE_1);
++ else
++ spd_decode_ddr3(&dimm->data, dimm->raw_spd);
++
++ if (CONFIG(DEBUG_RAM_SETUP))
++ dram_print_spd_ddr3(&dimm->data);
++}
++
++static enum raminit_status find_common_spd_parameters(struct sysinfo *ctrl)
++{
++ ctrl->cas_supported = 0xffff;
++ ctrl->flags.raw = 0xffffffff;
++
++ ctrl->tCK = 0;
++ ctrl->tAA = 0;
++ ctrl->tWR = 0;
++ ctrl->tRCD = 0;
++ ctrl->tRRD = 0;
++ ctrl->tRP = 0;
++ ctrl->tRAS = 0;
++ ctrl->tRC = 0;
++ ctrl->tRFC = 0;
++ ctrl->tWTR = 0;
++ ctrl->tRTP = 0;
++ ctrl->tFAW = 0;
++ ctrl->tCWL = 0;
++ ctrl->tCMD = 0;
++ ctrl->chanmap = 0;
++
++ bool yes_ecc = false;
++ bool not_ecc = false;
++
++ for (uint8_t channel = 0; channel < NUM_CHANNELS; channel++) {
++ ctrl->dpc[channel] = 0;
++ ctrl->rankmap[channel] = 0;
++ ctrl->rank_mirrored[channel] = 0;
++ ctrl->channel_size_mb[channel] = 0;
++ for (uint8_t slot = 0; slot < NUM_SLOTS; slot++) {
++ struct raminit_dimm_info *const dimm = &ctrl->dimms[channel][slot];
++ if (!dimm->valid)
++ continue;
++
++ printk(RAM_DEBUG, "\nCH%uS%u SPD:\n", channel, slot);
++ decode_spd(dimm);
++
++ ctrl->chanmap |= BIT(channel);
++ ctrl->dpc[channel]++;
++ ctrl->channel_size_mb[channel] += dimm->data.size_mb;
++
++ /* The first rank of a populated slot is always present */
++ const uint8_t rank = slot + slot;
++ assert(dimm->data.ranks);
++ ctrl->rankmap[channel] |= (BIT(dimm->data.ranks) - 1) << rank;
++
++ if (dimm->data.flags.pins_mirrored)
++ ctrl->rank_mirrored[channel] |= BIT(rank + 1);
++
++ /* Find common settings */
++ ctrl->cas_supported &= dimm->data.cas_supported;
++ ctrl->flags.raw &= dimm->data.flags.raw;
++ ctrl->tCK = MAX(ctrl->tCK, dimm->data.tCK);
++ ctrl->tAA = MAX(ctrl->tAA, dimm->data.tAA);
++ ctrl->tWR = MAX(ctrl->tWR, dimm->data.tWR);
++ ctrl->tRCD = MAX(ctrl->tRCD, dimm->data.tRCD);
++ ctrl->tRRD = MAX(ctrl->tRRD, dimm->data.tRRD);
++ ctrl->tRP = MAX(ctrl->tRP, dimm->data.tRP);
++ ctrl->tRAS = MAX(ctrl->tRAS, dimm->data.tRAS);
++ ctrl->tRC = MAX(ctrl->tRC, dimm->data.tRC);
++ ctrl->tRFC = MAX(ctrl->tRFC, dimm->data.tRFC);
++ ctrl->tWTR = MAX(ctrl->tWTR, dimm->data.tWTR);
++ ctrl->tRTP = MAX(ctrl->tRTP, dimm->data.tRTP);
++ ctrl->tFAW = MAX(ctrl->tFAW, dimm->data.tFAW);
++ ctrl->tCWL = MAX(ctrl->tCWL, dimm->data.tCWL);
++ ctrl->tCMD = MAX(ctrl->tCMD, dimm->data.tCMD);
++
++ yes_ecc |= dimm->data.flags.is_ecc;
++ not_ecc |= !dimm->data.flags.is_ecc;
++ }
++ }
++
++ if (!ctrl->chanmap) {
++ printk(BIOS_ERR, "No DIMMs were found\n");
++ return RAMINIT_STATUS_NO_MEMORY_INSTALLED;
++ }
++ if (!ctrl->cas_supported) {
++ printk(BIOS_ERR, "Could not resolve common CAS latency\n");
++ return RAMINIT_STATUS_UNSUPPORTED_MEMORY;
++ }
++ /** TODO: Properly handle ECC support and ECC forced **/
++ if (yes_ecc && not_ecc) {
++ /** TODO: Test if the ECC DIMMs can be operated as non-ECC DIMMs **/
++ printk(BIOS_ERR, "Both ECC and non-ECC DIMMs present, this is unsupported\n");
++ return RAMINIT_STATUS_UNSUPPORTED_MEMORY;
++ }
++ if (yes_ecc)
++ ctrl->lanes = NUM_LANES;
++ else
++ ctrl->lanes = NUM_LANES_NO_ECC;
++
++ ctrl->is_ecc = yes_ecc;
++
++ /** TODO: Complete LPDDR support **/
++ ctrl->lpddr = false;
++
++ return RAMINIT_STATUS_SUCCESS;
++}
++
++enum raminit_status collect_spd_info(struct sysinfo *ctrl)
++{
++ get_spd_data(ctrl);
++ return find_common_spd_parameters(ctrl);
++}
+--
+2.39.2
+
diff --git a/config/coreboot/haswell/patches/0011-haswell-NRI-Initialise-MPLL.patch b/config/coreboot/haswell/patches/0011-haswell-NRI-Initialise-MPLL.patch
new file mode 100644
index 00000000..1fec2e38
--- /dev/null
+++ b/config/coreboot/haswell/patches/0011-haswell-NRI-Initialise-MPLL.patch
@@ -0,0 +1,346 @@
+From 77a89d55ab7a715dc20c34a6edacaaf781b56087 Mon Sep 17 00:00:00 2001
+From: Angel Pons <th3fanbus@gmail.com>
+Date: Sat, 7 May 2022 14:36:10 +0200
+Subject: [PATCH 11/26] haswell NRI: Initialise MPLL
+
+Add code to initialise the MPLL (Memory PLL). The procedure is similar
+to the one for Sandy/Ivy Bridge, but it is not worth factoring out.
+
+Change-Id: I978c352de68f6d8cecc76f4ae3c12daaf4be9ed6
+Signed-off-by: Angel Pons <th3fanbus@gmail.com>
+---
+ .../intel/haswell/native_raminit/Makefile.inc | 2 +
+ .../intel/haswell/native_raminit/init_mpll.c | 210 ++++++++++++++++++
+ .../haswell/native_raminit/io_comp_control.c | 22 ++
+ .../haswell/native_raminit/raminit_main.c | 1 +
+ .../haswell/native_raminit/raminit_native.h | 11 +
+ .../intel/haswell/registers/mchbar.h | 3 +
+ 6 files changed, 249 insertions(+)
+ create mode 100644 src/northbridge/intel/haswell/native_raminit/init_mpll.c
+ create mode 100644 src/northbridge/intel/haswell/native_raminit/io_comp_control.c
+
+diff --git a/src/northbridge/intel/haswell/native_raminit/Makefile.inc b/src/northbridge/intel/haswell/native_raminit/Makefile.inc
+index ebf7abc6ec..c125d84f0b 100644
+--- a/src/northbridge/intel/haswell/native_raminit/Makefile.inc
++++ b/src/northbridge/intel/haswell/native_raminit/Makefile.inc
+@@ -1,5 +1,7 @@
+ ## SPDX-License-Identifier: GPL-2.0-or-later
+
++romstage-y += init_mpll.c
++romstage-y += io_comp_control.c
+ romstage-y += raminit_main.c
+ romstage-y += raminit_native.c
+ romstage-y += spd_bitmunching.c
+diff --git a/src/northbridge/intel/haswell/native_raminit/init_mpll.c b/src/northbridge/intel/haswell/native_raminit/init_mpll.c
+new file mode 100644
+index 0000000000..2faa183724
+--- /dev/null
++++ b/src/northbridge/intel/haswell/native_raminit/init_mpll.c
+@@ -0,0 +1,210 @@
++/* SPDX-License-Identifier: GPL-2.0-or-later */
++
++#include <commonlib/clamp.h>
++#include <console/console.h>
++#include <delay.h>
++#include <device/pci_ops.h>
++#include <northbridge/intel/haswell/haswell.h>
++#include <types.h>
++
++#include "raminit_native.h"
++
++static uint32_t get_mem_multiplier(const struct sysinfo *ctrl)
++{
++ const uint32_t mult = NS2MHZ_DIV256 / (ctrl->tCK * ctrl->base_freq);
++
++ if (ctrl->base_freq == 100)
++ return clamp_u32(7, mult, 12);
++
++ if (ctrl->base_freq == 133)
++ return clamp_u32(3, mult, 10);
++
++ die("Unsupported base frequency\n");
++}
++
++static void normalize_tck(struct sysinfo *ctrl, const bool pll_ref100)
++{
++ /** TODO: Haswell supports up to DDR3-2600 **/
++ if (ctrl->tCK <= TCK_1200MHZ) {
++ ctrl->tCK = TCK_1200MHZ;
++ ctrl->base_freq = 133;
++ ctrl->mem_clock_mhz = 1200;
++
++ } else if (ctrl->tCK <= TCK_1100MHZ) {
++ ctrl->tCK = TCK_1100MHZ;
++ ctrl->base_freq = 100;
++ ctrl->mem_clock_mhz = 1100;
++
++ } else if (ctrl->tCK <= TCK_1066MHZ) {
++ ctrl->tCK = TCK_1066MHZ;
++ ctrl->base_freq = 133;
++ ctrl->mem_clock_mhz = 1066;
++
++ } else if (ctrl->tCK <= TCK_1000MHZ) {
++ ctrl->tCK = TCK_1000MHZ;
++ ctrl->base_freq = 100;
++ ctrl->mem_clock_mhz = 1000;
++
++ } else if (ctrl->tCK <= TCK_933MHZ) {
++ ctrl->tCK = TCK_933MHZ;
++ ctrl->base_freq = 133;
++ ctrl->mem_clock_mhz = 933;
++
++ } else if (ctrl->tCK <= TCK_900MHZ) {
++ ctrl->tCK = TCK_900MHZ;
++ ctrl->base_freq = 100;
++ ctrl->mem_clock_mhz = 900;
++
++ } else if (ctrl->tCK <= TCK_800MHZ) {
++ ctrl->tCK = TCK_800MHZ;
++ ctrl->base_freq = 133;
++ ctrl->mem_clock_mhz = 800;
++
++ } else if (ctrl->tCK <= TCK_700MHZ) {
++ ctrl->tCK = TCK_700MHZ;
++ ctrl->base_freq = 100;
++ ctrl->mem_clock_mhz = 700;
++
++ } else if (ctrl->tCK <= TCK_666MHZ) {
++ ctrl->tCK = TCK_666MHZ;
++ ctrl->base_freq = 133;
++ ctrl->mem_clock_mhz = 666;
++
++ } else if (ctrl->tCK <= TCK_533MHZ) {
++ ctrl->tCK = TCK_533MHZ;
++ ctrl->base_freq = 133;
++ ctrl->mem_clock_mhz = 533;
++
++ } else if (ctrl->tCK <= TCK_400MHZ) {
++ ctrl->tCK = TCK_400MHZ;
++ ctrl->base_freq = 133;
++ ctrl->mem_clock_mhz = 400;
++
++ } else {
++ ctrl->tCK = 0;
++ ctrl->base_freq = 1;
++ ctrl->mem_clock_mhz = 0;
++ return;
++ }
++ if (!pll_ref100 && ctrl->base_freq == 100) {
++ /* Skip unsupported frequency */
++ ctrl->tCK++;
++ normalize_tck(ctrl, pll_ref100);
++ }
++}
++
++#define MIN_CAS 4
++#define MAX_CAS 24
++
++static uint8_t find_compatible_cas(struct sysinfo *ctrl)
++{
++ printk(RAM_DEBUG, "With tCK %u, try CAS: ", ctrl->tCK);
++ const uint8_t cas_lower = MAX(MIN_CAS, DIV_ROUND_UP(ctrl->tAA, ctrl->tCK));
++ const uint8_t cas_upper = MIN(MAX_CAS, 19); /* JEDEC MR0 limit */
++
++ if (!(ctrl->cas_supported >> (cas_lower - MIN_CAS))) {
++ printk(RAM_DEBUG, "DIMMs do not support CAS >= %u\n", cas_lower);
++ ctrl->tCK++;
++ return 0;
++ }
++ for (uint8_t cas = cas_lower; cas <= cas_upper; cas++) {
++ printk(RAM_DEBUG, "%u ", cas);
++ if (ctrl->cas_supported & BIT(cas - MIN_CAS)) {
++ printk(RAM_DEBUG, "OK\n");
++ return cas;
++ }
++ }
++ return 0;
++}
++
++static enum raminit_status find_cas_tck(struct sysinfo *ctrl)
++{
++ /** TODO: Honor all possible PLL_REF100_CFG values **/
++ uint8_t pll_ref100 = (pci_read_config32(HOST_BRIDGE, CAPID0_B) >> 21) & 0x7;
++ printk(RAM_DEBUG, "PLL_REF100_CFG value: 0x%x\n", pll_ref100);
++ printk(RAM_DEBUG, "100MHz reference clock support: %s\n", pll_ref100 ? "yes" : "no");
++
++ uint8_t selected_cas;
++ while (true) {
++ /* Round tCK up so that it is a multiple of either 133 or 100 MHz */
++ normalize_tck(ctrl, pll_ref100);
++ if (!ctrl->tCK) {
++ printk(BIOS_ERR, "Couldn't find compatible clock / CAS settings\n");
++ return RAMINIT_STATUS_MPLL_INIT_FAILURE;
++ }
++ selected_cas = find_compatible_cas(ctrl);
++ if (selected_cas)
++ break;
++
++ ctrl->tCK++;
++ }
++ printk(BIOS_DEBUG, "Found compatible clock / CAS settings\n");
++ printk(BIOS_DEBUG, "Selected DRAM frequency: %u MHz\n", NS2MHZ_DIV256 / ctrl->tCK);
++ printk(BIOS_DEBUG, "Selected CAS latency : %uT\n", selected_cas);
++ ctrl->multiplier = get_mem_multiplier(ctrl);
++ return RAMINIT_STATUS_SUCCESS;
++}
++
++enum raminit_status initialise_mpll(struct sysinfo *ctrl)
++{
++ if (ctrl->tCK > TCK_400MHZ) {
++ printk(BIOS_ERR, "tCK is too slow. Increasing to 400 MHz as last resort\n");
++ ctrl->tCK = TCK_400MHZ;
++ }
++ while (true) {
++ if (!ctrl->qclkps) {
++ const enum raminit_status status = find_cas_tck(ctrl);
++ if (status)
++ return status;
++ }
++
++ /*
++ * Unlike previous generations, Haswell's MPLL won't shut down if the
++ * requested frequency isn't supported. But we cannot reinitialize it.
++ * Another different thing: MPLL registers are 4-bit instead of 8-bit.
++ */
++
++ /** FIXME: Obtain current clock frequency if we want to skip this **/
++ //if (mchbar_read32(MC_BIOS_DATA) != 0)
++ // break;
++
++ uint32_t mc_bios_req = ctrl->multiplier;
++ if (ctrl->base_freq == 100) {
++ /* Use 100 MHz reference clock */
++ mc_bios_req |= BIT(4);
++ }
++ mc_bios_req |= BIT(31);
++ printk(RAM_DEBUG, "MC_BIOS_REQ = 0x%08x\n", mc_bios_req);
++ printk(BIOS_DEBUG, "MPLL busy... ");
++ mchbar_write32(MC_BIOS_REQ, mc_bios_req);
++
++ for (unsigned int i = 0; i <= 5000; i++) {
++ if (!(mchbar_read32(MC_BIOS_REQ) & BIT(31))) {
++ printk(BIOS_DEBUG, "done in %u us\n", i);
++ break;
++ }
++ udelay(1);
++ }
++ if (mchbar_read32(MC_BIOS_REQ) & BIT(31))
++ printk(BIOS_DEBUG, "did not lock\n");
++
++ /* Verify locked frequency */
++ const uint32_t mc_bios_data = mchbar_read32(MC_BIOS_DATA);
++ printk(RAM_DEBUG, "MC_BIOS_DATA = 0x%08x\n", mc_bios_data);
++ if ((mc_bios_data & 0xf) >= ctrl->multiplier)
++ break;
++
++ printk(BIOS_DEBUG, "Retrying at a lower frequency\n\n");
++ ctrl->tCK++;
++ }
++ if (!ctrl->mem_clock_mhz) {
++ printk(BIOS_ERR, "Could not program MPLL frequency\n");
++ return RAMINIT_STATUS_MPLL_INIT_FAILURE;
++ }
++ printk(BIOS_DEBUG, "MPLL frequency is set to: %u MHz ", ctrl->mem_clock_mhz);
++ ctrl->mem_clock_fs = 1000000000 / ctrl->mem_clock_mhz;
++ printk(BIOS_DEBUG, "(period: %u femtoseconds)\n", ctrl->mem_clock_fs);
++ ctrl->qclkps = ctrl->mem_clock_fs / 2000;
++ printk(BIOS_DEBUG, "Quadrature clock period: %u picoseconds\n", ctrl->qclkps);
++ return wait_for_first_rcomp();
++}
+diff --git a/src/northbridge/intel/haswell/native_raminit/io_comp_control.c b/src/northbridge/intel/haswell/native_raminit/io_comp_control.c
+new file mode 100644
+index 0000000000..7e96c08938
+--- /dev/null
++++ b/src/northbridge/intel/haswell/native_raminit/io_comp_control.c
+@@ -0,0 +1,22 @@
++/* SPDX-License-Identifier: GPL-2.0-or-later */
++
++#include <commonlib/clamp.h>
++#include <console/console.h>
++#include <northbridge/intel/haswell/haswell.h>
++#include <timer.h>
++#include <types.h>
++
++#include "raminit_native.h"
++
++enum raminit_status wait_for_first_rcomp(void)
++{
++ struct stopwatch timer;
++ stopwatch_init_msecs_expire(&timer, 2000);
++ do {
++ if (mchbar_read32(RCOMP_TIMER) & BIT(16))
++ return RAMINIT_STATUS_SUCCESS;
++
++ } while (!stopwatch_expired(&timer));
++ printk(BIOS_ERR, "Timed out waiting for RCOMP to complete\n");
++ return RAMINIT_STATUS_POLL_TIMEOUT;
++}
+diff --git a/src/northbridge/intel/haswell/native_raminit/raminit_main.c b/src/northbridge/intel/haswell/native_raminit/raminit_main.c
+index 2d2cfa48bb..09545422c0 100644
+--- a/src/northbridge/intel/haswell/native_raminit/raminit_main.c
++++ b/src/northbridge/intel/haswell/native_raminit/raminit_main.c
+@@ -21,6 +21,7 @@ struct task_entry {
+
+ static const struct task_entry cold_boot[] = {
+ { collect_spd_info, true, "PROCSPD", },
++ { initialise_mpll, true, "INITMPLL", },
+ };
+
+ /* Return a generic stepping value to make stepping checks simpler */
+diff --git a/src/northbridge/intel/haswell/native_raminit/raminit_native.h b/src/northbridge/intel/haswell/native_raminit/raminit_native.h
+index 1a0793947e..a54581abc7 100644
+--- a/src/northbridge/intel/haswell/native_raminit/raminit_native.h
++++ b/src/northbridge/intel/haswell/native_raminit/raminit_native.h
+@@ -23,6 +23,8 @@ enum raminit_status {
+ RAMINIT_STATUS_SUCCESS = 0,
+ RAMINIT_STATUS_NO_MEMORY_INSTALLED,
+ RAMINIT_STATUS_UNSUPPORTED_MEMORY,
++ RAMINIT_STATUS_MPLL_INIT_FAILURE,
++ RAMINIT_STATUS_POLL_TIMEOUT,
+ RAMINIT_STATUS_UNSPECIFIED_ERROR, /** TODO: Deprecated in favor of specific values **/
+ };
+
+@@ -82,10 +84,19 @@ struct sysinfo {
+ uint8_t rankmap[NUM_CHANNELS];
+ uint8_t rank_mirrored[NUM_CHANNELS];
+ uint32_t channel_size_mb[NUM_CHANNELS];
++
++ uint8_t base_freq; /* Memory base frequency, either 100 or 133 MHz */
++ uint32_t multiplier;
++ uint32_t mem_clock_mhz;
++ uint32_t mem_clock_fs; /* Memory clock period in femtoseconds */
++ uint32_t qclkps; /* Quadrature clock period in picoseconds */
+ };
+
+ void raminit_main(enum raminit_boot_mode bootmode);
+
+ enum raminit_status collect_spd_info(struct sysinfo *ctrl);
++enum raminit_status initialise_mpll(struct sysinfo *ctrl);
++
++enum raminit_status wait_for_first_rcomp(void);
+
+ #endif
+diff --git a/src/northbridge/intel/haswell/registers/mchbar.h b/src/northbridge/intel/haswell/registers/mchbar.h
+index 5610e7089a..45f8174995 100644
+--- a/src/northbridge/intel/haswell/registers/mchbar.h
++++ b/src/northbridge/intel/haswell/registers/mchbar.h
+@@ -13,6 +13,8 @@
+ #define MC_INIT_STATE_G 0x5030
+ #define MRC_REVISION 0x5034 /* MRC Revision */
+
++#define RCOMP_TIMER 0x5084
++
+ #define MC_LOCK 0x50fc /* Memory Controller Lock register */
+
+ #define GFXVTBAR 0x5400 /* Base address for IGD */
+@@ -61,6 +63,7 @@
+
+ #define BIOS_RESET_CPL 0x5da8 /* 8-bit */
+
++#define MC_BIOS_REQ 0x5e00 /* Memory frequency request register */
+ #define MC_BIOS_DATA 0x5e04 /* Miscellaneous information for BIOS */
+ #define SAPMCTL 0x5f00
+
+--
+2.39.2
+
diff --git a/config/coreboot/haswell/patches/0012-haswell-NRI-Post-process-selected-timings.patch b/config/coreboot/haswell/patches/0012-haswell-NRI-Post-process-selected-timings.patch
new file mode 100644
index 00000000..e38f8e57
--- /dev/null
+++ b/config/coreboot/haswell/patches/0012-haswell-NRI-Post-process-selected-timings.patch
@@ -0,0 +1,249 @@
+From faabed9ca8974b2e7192c55b59a9d28d75e72df6 Mon Sep 17 00:00:00 2001
+From: Angel Pons <th3fanbus@gmail.com>
+Date: Sat, 7 May 2022 16:29:55 +0200
+Subject: [PATCH 12/26] haswell NRI: Post-process selected timings
+
+Once the MPLL has been initialised, convert the timings from the SPD to
+be in DCLKs, which is what the hardware expects. In addition, calculate
+the values for tREFI and tXP.
+
+Change-Id: Id02caf858f75b9e08016762b3aefda282b274386
+Signed-off-by: Angel Pons <th3fanbus@gmail.com>
+---
+ .../intel/haswell/native_raminit/Makefile.inc | 1 +
+ .../haswell/native_raminit/lookup_timings.c | 62 +++++++++++
+ .../haswell/native_raminit/raminit_main.c | 1 +
+ .../haswell/native_raminit/raminit_native.h | 8 ++
+ .../haswell/native_raminit/spd_bitmunching.c | 100 ++++++++++++++++++
+ 5 files changed, 172 insertions(+)
+ create mode 100644 src/northbridge/intel/haswell/native_raminit/lookup_timings.c
+
+diff --git a/src/northbridge/intel/haswell/native_raminit/Makefile.inc b/src/northbridge/intel/haswell/native_raminit/Makefile.inc
+index c125d84f0b..2769e0bbb4 100644
+--- a/src/northbridge/intel/haswell/native_raminit/Makefile.inc
++++ b/src/northbridge/intel/haswell/native_raminit/Makefile.inc
+@@ -1,5 +1,6 @@
+ ## SPDX-License-Identifier: GPL-2.0-or-later
+
++romstage-y += lookup_timings.c
+ romstage-y += init_mpll.c
+ romstage-y += io_comp_control.c
+ romstage-y += raminit_main.c
+diff --git a/src/northbridge/intel/haswell/native_raminit/lookup_timings.c b/src/northbridge/intel/haswell/native_raminit/lookup_timings.c
+new file mode 100644
+index 0000000000..038686c844
+--- /dev/null
++++ b/src/northbridge/intel/haswell/native_raminit/lookup_timings.c
+@@ -0,0 +1,62 @@
++/* SPDX-License-Identifier: GPL-2.0-or-later */
++
++#include <commonlib/clamp.h>
++#include <types.h>
++
++#include "raminit_native.h"
++
++struct timing_lookup {
++ uint32_t clock;
++ uint32_t value;
++};
++
++static uint32_t lookup_timing(
++ const uint32_t mem_clock_mhz,
++ const struct timing_lookup *const lookup,
++ const size_t length)
++{
++ /* Fall back to the last index */
++ size_t i;
++ for (i = 0; i < length - 1; i++) {
++ /* Account for imprecise frequency values */
++ if ((mem_clock_mhz - 5) <= lookup[i].clock)
++ break;
++ }
++ return lookup[i].value;
++}
++
++static const uint32_t fmax = UINT32_MAX;
++
++uint8_t get_tCWL(const uint32_t mem_clock_mhz)
++{
++ const struct timing_lookup lut[] = {
++ { 400, 5 },
++ { 533, 6 },
++ { 666, 7 },
++ { 800, 8 },
++ { 933, 9 },
++ { 1066, 10 },
++ { 1200, 11 },
++ { fmax, 12 },
++ };
++ return lookup_timing(mem_clock_mhz, lut, ARRAY_SIZE(lut));
++}
++
++/* tREFI = 7800 ns * DDR MHz */
++uint32_t get_tREFI(const uint32_t mem_clock_mhz)
++{
++ return (mem_clock_mhz * 7800) / 1000;
++}
++
++uint32_t get_tXP(const uint32_t mem_clock_mhz)
++{
++ const struct timing_lookup lut[] = {
++ { 400, 3 },
++ { 666, 4 },
++ { 800, 5 },
++ { 933, 6 },
++ { 1066, 7 },
++ { fmax, 8 },
++ };
++ return lookup_timing(mem_clock_mhz, lut, ARRAY_SIZE(lut));
++}
+diff --git a/src/northbridge/intel/haswell/native_raminit/raminit_main.c b/src/northbridge/intel/haswell/native_raminit/raminit_main.c
+index 09545422c0..5f2be980d4 100644
+--- a/src/northbridge/intel/haswell/native_raminit/raminit_main.c
++++ b/src/northbridge/intel/haswell/native_raminit/raminit_main.c
+@@ -22,6 +22,7 @@ struct task_entry {
+ static const struct task_entry cold_boot[] = {
+ { collect_spd_info, true, "PROCSPD", },
+ { initialise_mpll, true, "INITMPLL", },
++ { convert_timings, true, "CONVTIM", },
+ };
+
+ /* Return a generic stepping value to make stepping checks simpler */
+diff --git a/src/northbridge/intel/haswell/native_raminit/raminit_native.h b/src/northbridge/intel/haswell/native_raminit/raminit_native.h
+index a54581abc7..01e5ed1bd6 100644
+--- a/src/northbridge/intel/haswell/native_raminit/raminit_native.h
++++ b/src/northbridge/intel/haswell/native_raminit/raminit_native.h
+@@ -78,6 +78,9 @@ struct sysinfo {
+ uint32_t tCWL;
+ uint32_t tCMD;
+
++ uint32_t tREFI;
++ uint32_t tXP;
++
+ uint8_t lanes; /* 8 or 9 */
+ uint8_t chanmap;
+ uint8_t dpc[NUM_CHANNELS]; /* DIMMs per channel */
+@@ -96,7 +99,12 @@ void raminit_main(enum raminit_boot_mode bootmode);
+
+ enum raminit_status collect_spd_info(struct sysinfo *ctrl);
+ enum raminit_status initialise_mpll(struct sysinfo *ctrl);
++enum raminit_status convert_timings(struct sysinfo *ctrl);
+
+ enum raminit_status wait_for_first_rcomp(void);
+
++uint8_t get_tCWL(uint32_t mem_clock_mhz);
++uint32_t get_tREFI(uint32_t mem_clock_mhz);
++uint32_t get_tXP(uint32_t mem_clock_mhz);
++
+ #endif
+diff --git a/src/northbridge/intel/haswell/native_raminit/spd_bitmunching.c b/src/northbridge/intel/haswell/native_raminit/spd_bitmunching.c
+index dbe02c72d0..becbea0725 100644
+--- a/src/northbridge/intel/haswell/native_raminit/spd_bitmunching.c
++++ b/src/northbridge/intel/haswell/native_raminit/spd_bitmunching.c
+@@ -204,3 +204,103 @@ enum raminit_status collect_spd_info(struct sysinfo *ctrl)
+ get_spd_data(ctrl);
+ return find_common_spd_parameters(ctrl);
+ }
++
++#define MIN_CWL 5
++#define MAX_CWL 12
++
++/* Except for tCK, hardware expects all timing values in DCLKs, not nanoseconds */
++enum raminit_status convert_timings(struct sysinfo *ctrl)
++{
++ /*
++ * Obtain all required timing values, in DCLKs.
++ */
++
++ /* Convert primary timings from nanoseconds to DCLKs */
++ ctrl->tAA = DIV_ROUND_UP(ctrl->tAA, ctrl->tCK);
++ ctrl->tWR = DIV_ROUND_UP(ctrl->tWR, ctrl->tCK);
++ ctrl->tRCD = DIV_ROUND_UP(ctrl->tRCD, ctrl->tCK);
++ ctrl->tRRD = DIV_ROUND_UP(ctrl->tRRD, ctrl->tCK);
++ ctrl->tRP = DIV_ROUND_UP(ctrl->tRP, ctrl->tCK);
++ ctrl->tRAS = DIV_ROUND_UP(ctrl->tRAS, ctrl->tCK);
++ ctrl->tRC = DIV_ROUND_UP(ctrl->tRC, ctrl->tCK);
++ ctrl->tRFC = DIV_ROUND_UP(ctrl->tRFC, ctrl->tCK);
++ ctrl->tWTR = DIV_ROUND_UP(ctrl->tWTR, ctrl->tCK);
++ ctrl->tRTP = DIV_ROUND_UP(ctrl->tRTP, ctrl->tCK);
++ ctrl->tFAW = DIV_ROUND_UP(ctrl->tFAW, ctrl->tCK);
++ ctrl->tCWL = DIV_ROUND_UP(ctrl->tCWL, ctrl->tCK);
++ ctrl->tCMD = DIV_ROUND_UP(ctrl->tCMD, ctrl->tCK);
++
++ /* Constrain primary timings to hardware limits */
++ /** TODO: complain when clamping? **/
++ ctrl->tAA = clamp_u32(4, ctrl->tAA, 24);
++ ctrl->tWR = clamp_u32(5, ctrl->tWR, 16);
++ ctrl->tRCD = clamp_u32(4, ctrl->tRCD, 20);
++ ctrl->tRRD = clamp_u32(4, ctrl->tRRD, 65535);
++ ctrl->tRP = clamp_u32(4, ctrl->tRP, 15);
++ ctrl->tRAS = clamp_u32(10, ctrl->tRAS, 40);
++ ctrl->tRC = clamp_u32(1, ctrl->tRC, 4095);
++ ctrl->tRFC = clamp_u32(1, ctrl->tRFC, 511);
++ ctrl->tWTR = clamp_u32(4, ctrl->tWTR, 10);
++ ctrl->tRTP = clamp_u32(4, ctrl->tRTP, 15);
++ ctrl->tFAW = clamp_u32(10, ctrl->tFAW, 54);
++
++ /** TODO: Honor tREFI from XMP **/
++ ctrl->tREFI = get_tREFI(ctrl->mem_clock_mhz);
++ ctrl->tXP = get_tXP(ctrl->mem_clock_mhz);
++
++ /*
++ * Check some values, and adjust them if necessary.
++ */
++
++ /* If tWR cannot be written into DDR3 MR0, adjust it */
++ switch (ctrl->tWR) {
++ case 9:
++ case 11:
++ case 13:
++ case 15:
++ ctrl->tWR++;
++ }
++
++ /* If tCWL is not supported or unspecified, look up a reasonable default */
++ if (ctrl->tCWL < MIN_CWL || ctrl->tCWL > MAX_CWL)
++ ctrl->tCWL = get_tCWL(ctrl->mem_clock_mhz);
++
++ /* This is needed to support ODT properly on 2DPC */
++ if (ctrl->tAA - ctrl->tCWL > 4)
++ ctrl->tCWL = ctrl->tAA - 4;
++
++ /* If tCMD is invalid, use a guesstimate default */
++ if (!ctrl->tCMD) {
++ ctrl->tCMD = MAX(ctrl->dpc[0], ctrl->dpc[1]);
++ printk(RAM_DEBUG, "tCMD was zero, picking a guesstimate value\n");
++ }
++ ctrl->tCMD = clamp_u32(1, ctrl->tCMD, 3);
++
++ /*
++ * Print final timings.
++ */
++
++ /* tCK is special */
++ printk(BIOS_DEBUG, "Selected tCK : %u ns\n", ctrl->tCK / 256);
++
++ /* Primary timings */
++ printk(BIOS_DEBUG, "Selected tAA : %uT\n", ctrl->tAA);
++ printk(BIOS_DEBUG, "Selected tWR : %uT\n", ctrl->tWR);
++ printk(BIOS_DEBUG, "Selected tRCD : %uT\n", ctrl->tRCD);
++ printk(BIOS_DEBUG, "Selected tRRD : %uT\n", ctrl->tRRD);
++ printk(BIOS_DEBUG, "Selected tRP : %uT\n", ctrl->tRP);
++ printk(BIOS_DEBUG, "Selected tRAS : %uT\n", ctrl->tRAS);
++ printk(BIOS_DEBUG, "Selected tRC : %uT\n", ctrl->tRC);
++ printk(BIOS_DEBUG, "Selected tRFC : %uT\n", ctrl->tRFC);
++ printk(BIOS_DEBUG, "Selected tWTR : %uT\n", ctrl->tWTR);
++ printk(BIOS_DEBUG, "Selected tRTP : %uT\n", ctrl->tRTP);
++ printk(BIOS_DEBUG, "Selected tFAW : %uT\n", ctrl->tFAW);
++ printk(BIOS_DEBUG, "Selected tCWL : %uT\n", ctrl->tCWL);
++ printk(BIOS_DEBUG, "Selected tCMD : %uT\n", ctrl->tCMD);
++
++ /* Derived timings */
++ printk(BIOS_DEBUG, "Selected tREFI : %uT\n", ctrl->tREFI);
++ printk(BIOS_DEBUG, "Selected tXP : %uT\n", ctrl->tXP);
++
++ return RAMINIT_STATUS_SUCCESS;
++}
+--
+2.39.2
+
diff --git a/config/coreboot/haswell/patches/0013-haswell-NRI-Configure-initial-MC-settings.patch b/config/coreboot/haswell/patches/0013-haswell-NRI-Configure-initial-MC-settings.patch
new file mode 100644
index 00000000..b1c33328
--- /dev/null
+++ b/config/coreboot/haswell/patches/0013-haswell-NRI-Configure-initial-MC-settings.patch
@@ -0,0 +1,1593 @@
+From 1b0b17d85256193de825fa7ff0e04767c818f2fc Mon Sep 17 00:00:00 2001
+From: Angel Pons <th3fanbus@gmail.com>
+Date: Sat, 7 May 2022 17:22:07 +0200
+Subject: [PATCH 13/26] haswell NRI: Configure initial MC settings
+
+Program initial memory controller settings. Many of these values will be
+adjusted later during training.
+
+Change-Id: If33846b51cb1bab5d0458fe626e13afb1bdc900e
+Signed-off-by: Angel Pons <th3fanbus@gmail.com>
+---
+ .../intel/haswell/native_raminit/Makefile.inc | 2 +
+ .../haswell/native_raminit/configure_mc.c | 822 ++++++++++++++++++
+ .../haswell/native_raminit/raminit_main.c | 2 +
+ .../haswell/native_raminit/raminit_native.h | 101 +++
+ .../haswell/native_raminit/reg_structs.h | 405 +++++++++
+ .../haswell/native_raminit/timings_refresh.c | 13 +
+ .../intel/haswell/registers/mchbar.h | 94 ++
+ 7 files changed, 1439 insertions(+)
+ create mode 100644 src/northbridge/intel/haswell/native_raminit/configure_mc.c
+ create mode 100644 src/northbridge/intel/haswell/native_raminit/reg_structs.h
+ create mode 100644 src/northbridge/intel/haswell/native_raminit/timings_refresh.c
+
+diff --git a/src/northbridge/intel/haswell/native_raminit/Makefile.inc b/src/northbridge/intel/haswell/native_raminit/Makefile.inc
+index 2769e0bbb4..fc55277a65 100644
+--- a/src/northbridge/intel/haswell/native_raminit/Makefile.inc
++++ b/src/northbridge/intel/haswell/native_raminit/Makefile.inc
+@@ -1,8 +1,10 @@
+ ## SPDX-License-Identifier: GPL-2.0-or-later
+
++romstage-y += configure_mc.c
+ romstage-y += lookup_timings.c
+ romstage-y += init_mpll.c
+ romstage-y += io_comp_control.c
+ romstage-y += raminit_main.c
+ romstage-y += raminit_native.c
+ romstage-y += spd_bitmunching.c
++romstage-y += timings_refresh.c
+diff --git a/src/northbridge/intel/haswell/native_raminit/configure_mc.c b/src/northbridge/intel/haswell/native_raminit/configure_mc.c
+new file mode 100644
+index 0000000000..2a667b075b
+--- /dev/null
++++ b/src/northbridge/intel/haswell/native_raminit/configure_mc.c
+@@ -0,0 +1,822 @@
++/* SPDX-License-Identifier: GPL-2.0-or-later */
++
++#include <assert.h>
++#include <commonlib/clamp.h>
++#include <console/console.h>
++#include <delay.h>
++#include <lib.h>
++#include <northbridge/intel/haswell/haswell.h>
++#include <string.h>
++#include <types.h>
++
++#include "raminit_native.h"
++
++static void program_misc_control(struct sysinfo *ctrl)
++{
++ if (!is_hsw_ult())
++ return;
++
++ const union ddr_scram_misc_control_reg ddr_scram_misc_ctrl = {
++ .ddr_no_ch_interleave = !ctrl->dq_pins_interleaved,
++ .lpddr_mode = ctrl->lpddr,
++ .cke_mapping_ch0 = ctrl->lpddr ? ctrl->lpddr_cke_rank_map[0] : 0,
++ .cke_mapping_ch1 = ctrl->lpddr ? ctrl->lpddr_cke_rank_map[1] : 0,
++ };
++ mchbar_write32(DDR_SCRAM_MISC_CONTROL, ddr_scram_misc_ctrl.raw);
++}
++
++static void program_mrc_revision(void)
++{
++ mchbar_write32(MRC_REVISION, 0x01090000); /* MRC 1.9.0 Build 0 */
++}
++
++static void program_ranks_used(struct sysinfo *ctrl)
++{
++ for (uint8_t channel = 0; channel < NUM_CHANNELS; channel++) {
++ mchbar_write8(MC_INIT_STATE_ch(channel), ctrl->rankmap[channel]);
++ if (!does_ch_exist(ctrl, channel)) {
++ mchbar_write32(DDR_CLK_ch_RANKS_USED(channel), 0);
++ mchbar_write32(DDR_CTL_ch_CTL_RANKS_USED(channel), 0);
++ mchbar_write32(DDR_CKE_ch_CTL_RANKS_USED(channel), 0);
++ continue;
++ }
++ uint32_t clk_ranks_used = ctrl->rankmap[channel];
++ if (ctrl->lpddr) {
++ /* With LPDDR, the clock usage goes by group instead */
++ clk_ranks_used = 0;
++ for (uint8_t group = 0; group < NUM_GROUPS; group++) {
++ if (ctrl->dq_byte_map[channel][CT_ITERATION_CLOCK][group])
++ clk_ranks_used |= BIT(group);
++ }
++ }
++ mchbar_write32(DDR_CLK_ch_RANKS_USED(channel), clk_ranks_used);
++
++ uint32_t ctl_ranks_used = ctrl->rankmap[channel];
++ if (is_hsw_ult()) {
++ /* Set ODT disable bits */
++ /** TODO: May need to do this after JEDEC reset/init **/
++ if (ctrl->lpddr && ctrl->lpddr_dram_odt)
++ ctl_ranks_used |= 2 << 4; /* ODT is used on rank 0 */
++ else
++ ctl_ranks_used |= 3 << 4;
++ }
++ mchbar_write32(DDR_CTL_ch_CTL_RANKS_USED(channel), ctl_ranks_used);
++
++ uint32_t cke_ranks_used = ctrl->rankmap[channel];
++ if (ctrl->lpddr) {
++ /* Use CKE-to-rank mapping for LPDDR */
++ const uint8_t cke_rank_map = ctrl->lpddr_cke_rank_map[channel];
++ cke_ranks_used = 0;
++ for (uint8_t rank = 0; rank < NUM_SLOTRANKS; rank++) {
++ /* ULT only has 2 ranks per channel */
++ if (rank >= 2)
++ break;
++
++ if (!rank_in_ch(ctrl, rank, channel))
++ continue;
++
++ for (uint8_t cke = 0; cke < 4; cke++) {
++ if (rank == ((cke_rank_map >> cke) & 1))
++ cke_ranks_used |= BIT(cke);
++ }
++ }
++ }
++ mchbar_write32(DDR_CKE_ch_CTL_RANKS_USED(channel), cke_ranks_used);
++ }
++}
++
++static const uint8_t rxb_trad[2][5][4] = {
++ { /* Vdd low */
++ /* 1067 MHz, 1333 MHz, 1600 MHz, 1867 MHz, 2133 MHz, */
++ {4, 3, 3, 2}, {4, 4, 3, 2}, {5, 4, 3, 3}, {5, 4, 4, 3}, {5, 4, 4, 3},
++ },
++ { /* Vdd hi */
++ /* 1067 MHz, 1333 MHz, 1600 MHz, 1867 MHz, 2133 MHz, */
++ {4, 3, 3, 2}, {4, 4, 3, 2}, {5, 4, 3, 3}, {5, 4, 4, 3}, {4, 4, 3, 3},
++ },
++};
++
++static const uint8_t rxb_ultx[2][3][4] = {
++ { /* Vdd low */
++ /* 1067 MHz, 1333 MHz, 1600 MHz, */
++ {5, 6, 6, 5}, {5, 6, 6, 5}, {4, 6, 6, 6},
++ },
++ { /* Vdd hi */
++ /* 1067 MHz, 1333 MHz, 1600 MHz, */
++ {7, 6, 6, 5}, {7, 6, 6, 5}, {7, 6, 6, 6},
++ },
++};
++
++uint8_t get_rx_bias(const struct sysinfo *ctrl)
++{
++ const bool is_ult = is_hsw_ult();
++ const bool vddhi = ctrl->vdd_mv > 1350;
++ const uint8_t max_rxf = is_ult ? ARRAY_SIZE(rxb_ultx[0]) : ARRAY_SIZE(rxb_trad[0]);
++ const uint8_t ref_clk = ctrl->base_freq == 133 ? 4 : 6;
++ const uint8_t rx_f = clamp_s8(0, ctrl->multiplier - ref_clk, max_rxf - 1);
++ const uint8_t rx_cb = mchbar_read32(DDR_CLK_CB_STATUS) & 0x3;
++ if (is_ult)
++ return rxb_ultx[vddhi][rx_f][rx_cb];
++ else
++ return rxb_trad[vddhi][rx_f][rx_cb];
++}
++
++static void program_ddr_data(struct sysinfo *ctrl, const bool dis_odt_static, const bool vddhi)
++{
++ const bool is_ult = is_hsw_ult();
++
++ for (uint8_t rank = 0; rank < NUM_SLOTRANKS; rank++) {
++ if (!does_rank_exist(ctrl, rank))
++ continue;
++
++ const union ddr_data_rx_train_rank_reg rx_train = {
++ .rcven = 64,
++ .dqs_p = 32,
++ .dqs_n = 32,
++ };
++ mchbar_write32(DDR_DATA_RX_TRAIN_RANK(rank), rx_train.raw);
++ mchbar_write32(DDR_DATA_RX_PER_BIT_RANK(rank), 0x88888888);
++
++ const union ddr_data_tx_train_rank_reg tx_train = {
++ .tx_eq = TXEQFULLDRV | 11,
++ .dq_delay = 96,
++ .dqs_delay = 64,
++ };
++ mchbar_write32(DDR_DATA_TX_TRAIN_RANK(rank), tx_train.raw);
++ mchbar_write32(DDR_DATA_TX_PER_BIT_RANK(rank), 0x88888888);
++
++ for (uint8_t channel = 0; channel < NUM_CHANNELS; channel++) {
++ for (uint8_t byte = 0; byte < ctrl->lanes; byte++) {
++ ctrl->tx_dq[channel][rank][byte] = tx_train.dq_delay;
++ ctrl->txdqs[channel][rank][byte] = tx_train.dqs_delay;
++ ctrl->tx_eq[channel][rank][byte] = tx_train.tx_eq;
++
++ ctrl->rcven[channel][rank][byte] = rx_train.rcven;
++ ctrl->rxdqsp[channel][rank][byte] = rx_train.dqs_p;
++ ctrl->rxdqsn[channel][rank][byte] = rx_train.dqs_n;
++ ctrl->rx_eq[channel][rank][byte] = rx_train.rx_eq;
++ }
++ }
++ }
++ mchbar_write32(DDR_DATA_TX_XTALK, 0);
++ mchbar_write32(DDR_DATA_RX_OFFSET_VDQ, 0x88888888);
++ mchbar_write32(DDR_DATA_OFFSET_TRAIN, 0);
++ mchbar_write32(DDR_DATA_OFFSET_COMP, 0);
++
++ const union ddr_data_control_0_reg data_control_0 = {
++ .internal_clocks_on = !is_ult,
++ .data_vccddq_hi = vddhi,
++ .disable_odt_static = dis_odt_static,
++ .lpddr_mode = ctrl->lpddr,
++ .odt_samp_extend_en = ctrl->lpddr,
++ .early_rleak_en = ctrl->lpddr && ctrl->stepping >= STEPPING_C0,
++ };
++ mchbar_write32(DDR_DATA_CONTROL_0, data_control_0.raw);
++
++ const union ddr_data_control_1_reg data_control_1 = {
++ .dll_mask = 1,
++ .rx_bias_ctl = get_rx_bias(ctrl),
++ .odt_delay = -2,
++ .odt_duration = 7,
++ .sense_amp_delay = -2,
++ .sense_amp_duration = 7,
++ };
++ mchbar_write32(DDR_DATA_CONTROL_1, data_control_1.raw);
++
++ clear_data_offset_train_all(ctrl);
++
++ /* Stagger byte turn-on to reduce dI/dT */
++ const uint8_t byte_stagger[] = { 0, 4, 1, 5, 2, 6, 3, 7, 8 };
++ const uint8_t latency = 2 * ctrl->tAA - 6;
++ for (uint8_t channel = 0; channel < NUM_CHANNELS; channel++) {
++ if (!does_ch_exist(ctrl, channel))
++ continue;
++
++ union ddr_data_control_2_reg data_control_2 = {
++ .raw = 0,
++ };
++ if (is_ult) {
++ data_control_2.rx_dqs_amp_offset = 8;
++ data_control_2.rx_clk_stg_num = 0x1f;
++ data_control_2.leaker_comp = ctrl->lpddr ? 3 : 0;
++ }
++ for (uint8_t byte = 0; byte < ctrl->lanes; byte++) {
++ const uint8_t stg = latency * byte_stagger[byte] / ctrl->lanes;
++ data_control_2.rx_stagger_ctl = stg & 0x1f;
++ mchbar_write32(DQ_CONTROL_2(channel, byte), data_control_2.raw);
++ ctrl->data_offset_comp[channel][byte] = 0;
++ ctrl->dq_control_1[channel][byte] = data_control_1.raw;
++ ctrl->dq_control_2[channel][byte] = data_control_2.raw;
++ }
++ ctrl->dq_control_0[channel] = data_control_0.raw;
++ }
++}
++
++static void program_vsshi_control(struct sysinfo *ctrl, const uint16_t vsshi_mv)
++{
++ const uint32_t vsshi_control_reg = is_hsw_ult() ? 0x366c : 0x306c;
++ const union ddr_comp_vsshi_control_reg ddr_vsshi_control = {
++ .vsshi_target = (vsshi_mv * 192) / ctrl->vdd_mv - 20,
++ .hi_bw_divider = 1,
++ .lo_bw_divider = 1,
++ .bw_error = 2,
++ .panic_driver_en = 1,
++ .panic_voltage = 24 / 8, /* Voltage in 8mV steps */
++ .gain_boost = 1,
++ };
++ mchbar_write32(vsshi_control_reg, ddr_vsshi_control.raw);
++ mchbar_write32(DDR_COMP_VSSHI_CONTROL, ddr_vsshi_control.raw);
++}
++
++static void calc_vt_slope_code(const uint16_t slope, uint8_t *best_a, uint8_t *best_b)
++{
++ const int16_t coding[] = {0, -125, -62, -31, 250, 125, 62, 31};
++ *best_a = 0;
++ *best_b = 0;
++ int16_t best_err = slope;
++ for (uint8_t b = 0; b < ARRAY_SIZE(coding); b++) {
++ for (uint8_t a = b; a < ARRAY_SIZE(coding); a++) {
++ int16_t error = slope - (coding[a] + coding[b]);
++ if (error < 0)
++ error = -error;
++
++ if (error < best_err) {
++ best_err = error;
++ *best_a = a;
++ *best_b = b;
++ }
++ }
++ }
++}
++
++static void program_dimm_vref(struct sysinfo *ctrl, const uint16_t vccio_mv, const bool vddhi)
++{
++ const bool is_ult = is_hsw_ult();
++
++ /* Static values for ULT */
++ uint8_t vt_slope_a = 4;
++ uint8_t vt_slope_b = 0;
++ if (!is_ult) {
++ /* On non-ULT, compute best slope code */
++ const uint16_t vt_slope = 1500 * vccio_mv / ctrl->vdd_mv - 1000;
++ calc_vt_slope_code(vt_slope, &vt_slope_a, &vt_slope_b);
++ }
++ const union ddr_data_vref_control_reg ddr_vref_control = {
++ .hi_bw_divider = is_ult ? 0 : 3,
++ .lo_bw_divider = 3,
++ .sample_divider = is_ult ? 1 : 3,
++ .slow_bw_error = 1,
++ .hi_bw_enable = 1,
++ .vt_slope_b = vt_slope_b,
++ .vt_slope_a = vt_slope_a,
++ .vt_offset = 0,
++ };
++ mchbar_write32(is_ult ? 0xf68 : 0xf6c, ddr_vref_control.raw); /* Use CH1 byte 7 */
++
++ const union ddr_data_vref_adjust_reg ddr_vref_adjust = {
++ .en_dimm_vref_ca = 1,
++ .en_dimm_vref_ch0 = 1,
++ .en_dimm_vref_ch1 = 1,
++ .vccddq_hi_qnnn_h = vddhi,
++ .hi_z_timer_ctrl = 3,
++ };
++ ctrl->dimm_vref = ddr_vref_adjust;
++ mchbar_write32(DDR_DATA_VREF_ADJUST, ddr_vref_adjust.raw);
++}
++
++static uint32_t pi_code(const uint32_t code)
++{
++ return code << 21 | code << 14 | code << 7 | code << 0;
++}
++
++static void program_ddr_ca(struct sysinfo *ctrl, const bool vddhi)
++{
++ for (uint8_t channel = 0; channel < NUM_CHANNELS; channel++) {
++ if (!does_ch_exist(ctrl, channel))
++ continue;
++
++ const union ddr_clk_controls_reg ddr_clk_controls = {
++ .dll_mask = 1,
++ .vccddq_hi = vddhi,
++ .lpddr_mode = ctrl->lpddr,
++ };
++ mchbar_write32(DDR_CLK_ch_CONTROLS(channel), ddr_clk_controls.raw);
++
++ const union ddr_cmd_controls_reg ddr_cmd_controls = {
++ .dll_mask = 1,
++ .vccddq_hi = vddhi,
++ .lpddr_mode = ctrl->lpddr,
++ .early_weak_drive = 3,
++ .cmd_tx_eq = 1,
++ };
++ mchbar_write32(DDR_CMD_ch_CONTROLS(channel), ddr_cmd_controls.raw);
++
++ const union ddr_cke_ctl_controls_reg ddr_cke_controls = {
++ .dll_mask = 1,
++ .vccddq_hi = vddhi,
++ .lpddr_mode = ctrl->lpddr,
++ .early_weak_drive = 3,
++ .cmd_tx_eq = 1,
++ .ctl_tx_eq = 1,
++ .ctl_sr_drv = 2,
++ };
++ mchbar_write32(DDR_CKE_ch_CTL_CONTROLS(channel), ddr_cke_controls.raw);
++
++ const union ddr_cke_ctl_controls_reg ddr_ctl_controls = {
++ .dll_mask = 1,
++ .vccddq_hi = vddhi,
++ .lpddr_mode = ctrl->lpddr,
++ .ctl_tx_eq = 1,
++ .ctl_sr_drv = 2,
++ .la_drv_en_ovrd = 1, /* Must be set on ULT */
++ };
++ mchbar_write32(DDR_CTL_ch_CTL_CONTROLS(channel), ddr_ctl_controls.raw);
++
++ const uint8_t cmd_pi = ctrl->lpddr ? 96 : 64;
++ mchbar_write32(DDR_CMD_ch_PI_CODING(channel), pi_code(cmd_pi));
++ mchbar_write32(DDR_CKE_ch_CMD_PI_CODING(channel), pi_code(cmd_pi));
++ mchbar_write32(DDR_CKE_CTL_ch_CTL_PI_CODING(channel), pi_code(64));
++ mchbar_write32(DDR_CLK_ch_PI_CODING(channel), pi_code(64));
++
++ mchbar_write32(DDR_CMD_ch_COMP_OFFSET(channel), 0);
++ mchbar_write32(DDR_CLK_ch_COMP_OFFSET(channel), 0);
++ mchbar_write32(DDR_CKE_CTL_ch_CTL_COMP_OFFSET(channel), 0);
++
++ for (uint8_t group = 0; group < NUM_GROUPS; group++) {
++ ctrl->cke_cmd_pi_code[channel][group] = cmd_pi;
++ ctrl->cmd_north_pi_code[channel][group] = cmd_pi;
++ ctrl->cmd_south_pi_code[channel][group] = cmd_pi;
++ }
++ for (uint8_t rank = 0; rank < NUM_SLOTRANKS; rank++) {
++ ctrl->clk_pi_code[channel][rank] = 64;
++ ctrl->ctl_pi_code[channel][rank] = 64;
++ }
++ }
++}
++
++enum {
++ RCOMP_RD_ODT = 0,
++ RCOMP_WR_DS_DQ,
++ RCOMP_WR_DS_CMD,
++ RCOMP_WR_DS_CTL,
++ RCOMP_WR_DS_CLK,
++ RCOMP_MAX_CODES,
++};
++
++struct rcomp_info {
++ uint8_t resistor;
++ uint8_t sz_steps;
++ uint8_t target_r;
++ int8_t result;
++};
++
++static void program_rcomp_vref(struct sysinfo *ctrl, const bool dis_odt_static)
++{
++ const bool is_ult = is_hsw_ult();
++ /*
++ * +-------------------------------+
++ * | Rcomp resistor values in ohms |
++ * +-----------+------+------+-----+
++ * | Ball name | Trad | ULTX | Use |
++ * +-----------+------+------+-----+
++ * | SM_RCOMP0 | 100 | 200 | CMD |
++ * | SM_RCOMP1 | 75 | 120 | DQ |
++ * | SM_RCOMP2 | 100 | 100 | ODT |
++ * +-----------+------+------+-----+
++ */
++ struct rcomp_info rcomp_cfg[RCOMP_MAX_CODES] = {
++ [RCOMP_RD_ODT] = {
++ .resistor = 50,
++ .sz_steps = 96,
++ .target_r = 50,
++ },
++ [RCOMP_WR_DS_DQ] = {
++ .resistor = 25,
++ .sz_steps = 64,
++ .target_r = 33,
++ },
++ [RCOMP_WR_DS_CMD] = {
++ .resistor = 20,
++ .sz_steps = 64,
++ .target_r = 20,
++ },
++ [RCOMP_WR_DS_CTL] = {
++ .resistor = 20,
++ .sz_steps = 64,
++ .target_r = 20,
++ },
++ [RCOMP_WR_DS_CLK] = {
++ .resistor = 25,
++ .sz_steps = 64,
++ .target_r = 29,
++ },
++ };
++ if (is_ult) {
++ rcomp_cfg[RCOMP_WR_DS_DQ].resistor = 40;
++ rcomp_cfg[RCOMP_WR_DS_DQ].target_r = 40;
++ rcomp_cfg[RCOMP_WR_DS_CLK].resistor = 40;
++ } else if (ctrl->dpc[0] == 2 || ctrl->dpc[1] == 2) {
++ rcomp_cfg[RCOMP_RD_ODT].target_r = 60;
++ }
++ for (uint8_t i = 0; i < RCOMP_MAX_CODES; i++) {
++ struct rcomp_info *const r = &rcomp_cfg[i];
++ const int32_t div = 2 * (r->resistor + r->target_r);
++ assert(div);
++ const int32_t vref = (r->sz_steps * (r->resistor - r->target_r)) / div;
++
++ /* DqOdt is 5 bits wide, the other Rcomp targets are 4 bits wide */
++ const int8_t comp_limit = i == RCOMP_RD_ODT ? 16 : 8;
++ r->result = clamp_s32(-comp_limit, vref, comp_limit - 1);
++ }
++ const union ddr_comp_ctl_0_reg ddr_comp_ctl_0 = {
++ .disable_odt_static = dis_odt_static,
++ .dq_drv_vref = rcomp_cfg[RCOMP_WR_DS_DQ].result,
++ .dq_odt_vref = rcomp_cfg[RCOMP_RD_ODT].result,
++ .cmd_drv_vref = rcomp_cfg[RCOMP_WR_DS_CMD].result,
++ .ctl_drv_vref = rcomp_cfg[RCOMP_WR_DS_CTL].result,
++ .clk_drv_vref = rcomp_cfg[RCOMP_WR_DS_CLK].result,
++ };
++ ctrl->comp_ctl_0 = ddr_comp_ctl_0;
++ mchbar_write32(DDR_COMP_CTL_0, ctrl->comp_ctl_0.raw);
++}
++
++enum {
++ SCOMP_DQ = 0,
++ SCOMP_CMD,
++ SCOMP_CTL,
++ SCOMP_CLK,
++ SCOMP_MAX_CODES,
++};
++
++static void program_slew_rates(struct sysinfo *ctrl, const bool vddhi)
++{
++ const uint8_t min_cycle_delay[SCOMP_MAX_CODES] = { 46, 70, 70, 46 };
++ uint8_t buffer_stage_delay_ps[SCOMP_MAX_CODES] = { 59, 53, 53, 53 };
++ uint16_t comp_slew_rate_codes[SCOMP_MAX_CODES];
++
++ /* CMD Slew Rate = 1.8 for 2N */
++ if (ctrl->tCMD == 2)
++ buffer_stage_delay_ps[SCOMP_CMD] = 89;
++
++ /* CMD Slew Rate = 4 V/ns for double-pumped CMD bus */
++ if (ctrl->lpddr)
++ buffer_stage_delay_ps[SCOMP_CMD] = 63;
++
++ for (uint8_t i = 0; i < SCOMP_MAX_CODES; i++) {
++ uint16_t stages = DIV_ROUND_CLOSEST(ctrl->qclkps, buffer_stage_delay_ps[i]);
++ if (stages < 5)
++ stages = 5;
++
++ bool dll_pc = buffer_stage_delay_ps[i] < min_cycle_delay[i] || stages > 16;
++
++ /* Lock DLL... */
++ if (dll_pc)
++ comp_slew_rate_codes[i] = stages / 2 - 1; /* to a phase */
++ else
++ comp_slew_rate_codes[i] = (stages - 1) | BIT(4); /* to a cycle */
++ }
++ union ddr_comp_ctl_1_reg ddr_comp_ctl_1 = {
++ .dq_scomp = comp_slew_rate_codes[SCOMP_DQ],
++ .cmd_scomp = comp_slew_rate_codes[SCOMP_CMD],
++ .ctl_scomp = comp_slew_rate_codes[SCOMP_CTL],
++ .clk_scomp = comp_slew_rate_codes[SCOMP_CLK],
++ .vccddq_hi = vddhi,
++ };
++ ctrl->comp_ctl_1 = ddr_comp_ctl_1;
++ mchbar_write32(DDR_COMP_CTL_1, ctrl->comp_ctl_1.raw);
++}
++
++static uint32_t ln_x100(const uint32_t input_x100)
++{
++ uint32_t val = input_x100;
++ uint32_t ret = 0;
++ while (val > 271) {
++ val = (val * 1000) / 2718;
++ ret += 100;
++ }
++ return ret + (-16 * val * val + 11578 * val - 978860) / 10000;
++}
++
++static uint32_t compute_vsshi_vref(struct sysinfo *ctrl, const uint32_t vsshi_tgt, bool up)
++{
++ const uint32_t delta = 15;
++ const uint32_t c_die_vsshi = 2000;
++ const uint32_t r_cmd_ref = 100 * 10;
++ const uint32_t offset = up ? 64 : 0;
++ const uint32_t ln_vsshi = ln_x100((100 * vsshi_tgt) / (vsshi_tgt - delta));
++ const uint32_t r_target = (ctrl->qclkps * 2000) / (c_die_vsshi * ln_vsshi);
++ const uint32_t r_dividend = 128 * (up ? r_cmd_ref : r_target);
++ return r_dividend / (r_cmd_ref + r_target) - offset;
++}
++
++static void program_vsshi(struct sysinfo *ctrl, const uint16_t vccio_mv, const uint16_t vsshi)
++{
++ const uint16_t vsshi_down = vsshi + 24; /* Panic threshold of 24 mV */
++ const uint16_t vsshi_up = vccio_mv - vsshi_down;
++ const union ddr_comp_vsshi_reg ddr_comp_vsshi = {
++ .panic_drv_down_vref = compute_vsshi_vref(ctrl, vsshi_down, false),
++ .panic_drv_up_vref = compute_vsshi_vref(ctrl, vsshi_up, true),
++ .vt_offset = 128 * 450 / vccio_mv / 2,
++ .vt_slope_a = 4,
++ };
++ mchbar_write32(DDR_COMP_VSSHI, ddr_comp_vsshi.raw);
++}
++
++static void program_misc(struct sysinfo *ctrl)
++{
++ ctrl->misc_control_0.raw = mchbar_read32(DDR_SCRAM_MISC_CONTROL);
++ ctrl->misc_control_0.weaklock_latency = 12;
++ ctrl->misc_control_0.wl_sleep_cycles = 5;
++ ctrl->misc_control_0.wl_wake_cycles = 2;
++ mchbar_write32(DDR_SCRAM_MISC_CONTROL, ctrl->misc_control_0.raw);
++ for (uint8_t channel = 0; channel < NUM_CHANNELS; channel++) {
++ /* Keep scrambling disabled for training */
++ mchbar_write32(DDR_SCRAMBLE_ch(channel), 0);
++ }
++}
++
++/* Very weird, application-specific function */
++static void override_comp(uint32_t value, uint32_t width, uint32_t shift, uint32_t offset)
++{
++ const uint32_t mask = (1 << width) - 1;
++ uint32_t reg32 = mchbar_read32(offset);
++ reg32 &= ~(mask << shift);
++ reg32 |= (value << shift);
++ mchbar_write32(offset, reg32);
++}
++
++static void program_ls_comp(struct sysinfo *ctrl)
++{
++ /* Disable periodic COMP */
++ const union pcu_comp_reg m_comp = {
++ .comp_disable = 1,
++ .comp_interval = COMP_INT,
++ .comp_force = 1,
++ };
++ mchbar_write32(M_COMP, m_comp.raw);
++ udelay(10);
++
++ /* Override level shifter compensation */
++ const uint32_t ls_comp = 2;
++ override_comp(ls_comp, 3, 28, DDR_DATA_RCOMP_DATA_1);
++ override_comp(ls_comp, 3, 24, DDR_CMD_COMP);
++ override_comp(ls_comp, 3, 24, DDR_CKE_CTL_COMP);
++ override_comp(ls_comp, 3, 23, DDR_CLK_COMP);
++ override_comp(ls_comp, 3, 28, DDR_COMP_DATA_COMP_1);
++ override_comp(ls_comp, 3, 24, DDR_COMP_CMD_COMP);
++ override_comp(ls_comp, 4, 24, DDR_COMP_CTL_COMP);
++ override_comp(ls_comp, 4, 23, DDR_COMP_CLK_COMP);
++ override_comp(ls_comp, 3, 24, DDR_COMP_OVERRIDE);
++
++ /* Manually update the COMP values */
++ union ddr_scram_misc_control_reg ddr_scram_misc_ctrl = ctrl->misc_control_0;
++ ddr_scram_misc_ctrl.force_comp_update = 1;
++ mchbar_write32(DDR_SCRAM_MISC_CONTROL, ddr_scram_misc_ctrl.raw);
++
++ /* Use a fixed offset between ODT Up/Dn */
++ const union ddr_comp_data_comp_1_reg data_comp_1 = {
++ .raw = mchbar_read32(DDR_COMP_DATA_COMP_1),
++ };
++ const uint32_t odt_offset = data_comp_1.rcomp_odt_down - data_comp_1.rcomp_odt_up;
++ ctrl->comp_ctl_0.odt_up_down_off = odt_offset;
++ ctrl->comp_ctl_0.fixed_odt_offset = 1;
++ mchbar_write32(DDR_COMP_CTL_0, ctrl->comp_ctl_0.raw);
++}
++
++/** TODO: Deduplicate PCODE stuff, it's already implemented in CPU code **/
++static bool pcode_ready(void)
++{
++ const unsigned int delay_step = 10;
++ for (unsigned int i = 0; i < 1000; i += delay_step) {
++ if (!(mchbar_read32(BIOS_MAILBOX_INTERFACE) & MAILBOX_RUN_BUSY))
++ return true;
++
++ udelay(delay_step);
++ };
++ return false;
++}
++
++static uint32_t pcode_mailbox_read(const uint32_t command)
++{
++ if (!pcode_ready()) {
++ printk(BIOS_ERR, "PCODE: mailbox timeout on wait ready\n");
++ return 0;
++ }
++ mchbar_write32(BIOS_MAILBOX_INTERFACE, command | MAILBOX_RUN_BUSY);
++ if (!pcode_ready()) {
++ printk(BIOS_ERR, "PCODE: mailbox timeout on completion\n");
++ return 0;
++ }
++ return mchbar_read32(BIOS_MAILBOX_DATA);
++}
++
++static int pcode_mailbox_write(const uint32_t command, const uint32_t data)
++{
++ if (!pcode_ready()) {
++ printk(BIOS_ERR, "PCODE: mailbox timeout on wait ready\n");
++ return -1;
++ }
++ mchbar_write32(BIOS_MAILBOX_DATA, data);
++ mchbar_write32(BIOS_MAILBOX_INTERFACE, command | MAILBOX_RUN_BUSY);
++ if (!pcode_ready()) {
++ printk(BIOS_ERR, "PCODE: mailbox timeout on completion\n");
++ return -1;
++ }
++ return 0;
++}
++
++static void enable_2x_refresh(struct sysinfo *ctrl)
++{
++ if (!CONFIG(ENABLE_DDR_2X_REFRESH))
++ return;
++
++ printk(BIOS_DEBUG, "Enabling 2x Refresh\n");
++ const bool asr = ctrl->flags.asr;
++ const bool lpddr = ctrl->lpddr;
++
++ /* Mutually exclusive */
++ assert(!asr || !lpddr);
++ if (!asr) {
++ uint32_t reg32 = pcode_mailbox_read(MAILBOX_BIOS_CMD_READ_DDR_2X_REFRESH);
++ if (!(reg32 & BIT(31))) { /** TODO: What to do if this is locked? **/
++ reg32 |= BIT(0); /* Enable 2x refresh */
++ reg32 |= BIT(31); /* Lock */
++
++ if (lpddr)
++ reg32 |= 4 << 1; /* LPDDR MR4 1/2 tREFI */
++
++ if (pcode_mailbox_write(MAILBOX_BIOS_CMD_WRITE_DDR_2X_REFRESH, reg32))
++ printk(BIOS_ERR, "Could not enable Mailbox 2x Refresh\n");
++ }
++ if (!lpddr)
++ return;
++ }
++ assert(asr || lpddr);
++ uint16_t refi_reduction = 50;
++ if (lpddr) {
++ refi_reduction = 97;
++ mchbar_clrbits32(PCU_DDR_PTM_CTL, 1 << 7); /* DISABLE_DRAM_TS */
++ }
++ /** TODO: Remember why this is only done on cold boots **/
++ if (ctrl->bootmode == BOOTMODE_COLD) {
++ ctrl->tREFI *= refi_reduction;
++ ctrl->tREFI /= 100;
++ }
++}
++
++static void set_pcu_ddr_voltage(const uint16_t vdd_mv)
++{
++ /** TODO: Handle other voltages? **/
++ uint32_t pcu_ddr_voltage;
++ switch (vdd_mv) {
++ case 1200:
++ pcu_ddr_voltage = 3;
++ break;
++ case 1350:
++ pcu_ddr_voltage = 1;
++ break;
++ default:
++ case 1500:
++ pcu_ddr_voltage = 0;
++ break;
++ }
++ /* Set bits 0..2 */
++ mchbar_write32(PCU_DDR_VOLTAGE, pcu_ddr_voltage);
++}
++
++static void program_scheduler(struct sysinfo *ctrl)
++{
++ /*
++ * ZQ calibration needs to be serialized for LPDDR3. Otherwise,
++ * the processor issues LPDDR3 ZQ calibration in parallel when
++ * exiting Package C7 or deeper. This causes problems for dual
++ * and quad die packages since all ranks share the same ZQ pin.
++ *
++ * Erratum HSM94: LPDDR3 ZQ Calibration Following Deep Package
++ * C-state Exit May Lead to Unpredictable System Behavior
++ */
++ const union mcscheds_cbit_reg mcscheds_cbit = {
++ .dis_write_gap = 1,
++ .dis_odt = is_hsw_ult() && !(ctrl->lpddr && ctrl->lpddr_dram_odt),
++ .serialize_zq = ctrl->lpddr,
++ };
++ mchbar_write32(MCSCHEDS_CBIT, mcscheds_cbit.raw);
++ mchbar_write32(MCMNTS_SC_WDBWM, 0x553c3038);
++ if (ctrl->lpddr) {
++ for (uint8_t channel = 0; channel < NUM_CHANNELS; channel++) {
++ if (!does_ch_exist(ctrl, channel))
++ continue;
++
++ union mcmain_command_rate_limit_reg cmd_rate_limit = {
++ .raw = mchbar_read32(COMMAND_RATE_LIMIT_ch(channel)),
++ };
++ cmd_rate_limit.enable_cmd_limit = 1;
++ cmd_rate_limit.cmd_rate_limit = 3;
++ mchbar_write32(COMMAND_RATE_LIMIT_ch(channel), cmd_rate_limit.raw);
++ }
++ }
++}
++
++static uint8_t biggest_channel(const struct sysinfo *const ctrl)
++{
++ _Static_assert(NUM_CHANNELS == 2, "Code assumes exactly two channels");
++ return !!(ctrl->channel_size_mb[0] < ctrl->channel_size_mb[1]);
++}
++
++static void dram_zones(struct sysinfo *ctrl)
++{
++ /** TODO: Activate channel hash here, if enabled **/
++ const uint8_t biggest = biggest_channel(ctrl);
++ const uint8_t smaller = !biggest;
++
++ /** TODO: Use stacked mode if Memory Trace is enabled **/
++ const union mad_chnl_reg mad_channel = {
++ .ch_a = biggest,
++ .ch_b = smaller,
++ .ch_c = 2,
++ .lpddr_mode = ctrl->lpddr,
++ };
++ mchbar_write32(MAD_CHNL, mad_channel.raw);
++
++ const uint8_t channel_b_zone_size = ctrl->channel_size_mb[smaller] / 256;
++ const union mad_zr_reg mad_zr = {
++ .ch_b_double = channel_b_zone_size * 2,
++ .ch_b_single = channel_b_zone_size,
++ };
++ mchbar_write32(MAD_ZR, mad_zr.raw);
++}
++
++static uint8_t biggest_dimm(const struct raminit_dimm_info *dimms)
++{
++ _Static_assert(NUM_SLOTS <= 2, "Code assumes at most two DIMMs per channel.");
++ if (NUM_SLOTS == 1)
++ return 0;
++
++ return !!(dimms[0].data.size_mb < dimms[1].data.size_mb);
++}
++
++static void dram_dimm_mapping(struct sysinfo *ctrl)
++{
++ for (uint8_t channel = 0; channel < NUM_CHANNELS; channel++) {
++ if (!does_ch_exist(ctrl, channel)) {
++ const union mad_dimm_reg mad_dimm = {
++ .rank_interleave = 1,
++ .enh_interleave = 1,
++ };
++ mchbar_write32(MAD_DIMM(channel), mad_dimm.raw);
++ continue;
++ }
++ const uint8_t biggest = biggest_dimm(ctrl->dimms[channel]);
++ const uint8_t smaller = !biggest;
++ const struct dimm_attr_ddr3_st *dimm_a = &ctrl->dimms[channel][biggest].data;
++ const struct dimm_attr_ddr3_st *dimm_b = &ctrl->dimms[channel][smaller].data;
++ union mad_dimm_reg mad_dimm = {
++ .dimm_a_size = dimm_a->size_mb / 256,
++ .dimm_b_size = dimm_b->size_mb / 256,
++ .dimm_a_sel = biggest,
++ .dimm_a_ranks = dimm_a->ranks == 2,
++ .dimm_b_ranks = dimm_b->ranks == 2,
++ .dimm_a_width = dimm_a->width == 16,
++ .dimm_b_width = dimm_b->width == 16,
++ .rank_interleave = 1,
++ .enh_interleave = 1,
++ .ecc_mode = 0, /* Do not enable ECC yet */
++ };
++ if (is_hsw_ult())
++ mad_dimm.dimm_b_width = mad_dimm.dimm_a_width;
++
++ mchbar_write32(MAD_DIMM(channel), mad_dimm.raw);
++ if (ctrl->lpddr)
++ die("%s: Missing LPDDR support (LPDDR_MR_PARAMS)\n", __func__);
++ }
++}
++
++enum raminit_status configure_mc(struct sysinfo *ctrl)
++{
++ const uint16_t vccio_mv = 1000;
++ const uint16_t vsshi_mv = ctrl->vdd_mv - 950;
++ const bool dis_odt_static = is_hsw_ult(); /* Disable static ODT legs on ULT */
++ const bool vddhi = ctrl->vdd_mv > 1350;
++
++ program_misc_control(ctrl);
++ program_mrc_revision();
++ program_ranks_used(ctrl);
++ program_ddr_data(ctrl, dis_odt_static, vddhi);
++ program_vsshi_control(ctrl, vsshi_mv);
++ program_dimm_vref(ctrl, vccio_mv, vddhi);
++ program_ddr_ca(ctrl, vddhi);
++ program_rcomp_vref(ctrl, dis_odt_static);
++ program_slew_rates(ctrl, vddhi);
++ program_vsshi(ctrl, vccio_mv, vsshi_mv);
++ program_misc(ctrl);
++ program_ls_comp(ctrl);
++ enable_2x_refresh(ctrl);
++ set_pcu_ddr_voltage(ctrl->vdd_mv);
++ configure_timings(ctrl);
++ configure_refresh(ctrl);
++ program_scheduler(ctrl);
++ dram_zones(ctrl);
++ dram_dimm_mapping(ctrl);
++
++ return RAMINIT_STATUS_SUCCESS;
++}
+diff --git a/src/northbridge/intel/haswell/native_raminit/raminit_main.c b/src/northbridge/intel/haswell/native_raminit/raminit_main.c
+index 5f2be980d4..3a773cfa19 100644
+--- a/src/northbridge/intel/haswell/native_raminit/raminit_main.c
++++ b/src/northbridge/intel/haswell/native_raminit/raminit_main.c
+@@ -23,6 +23,7 @@ static const struct task_entry cold_boot[] = {
+ { collect_spd_info, true, "PROCSPD", },
+ { initialise_mpll, true, "INITMPLL", },
+ { convert_timings, true, "CONVTIM", },
++ { configure_mc, true, "CONFMC", },
+ };
+
+ /* Return a generic stepping value to make stepping checks simpler */
+@@ -54,6 +55,7 @@ static void initialize_ctrl(struct sysinfo *ctrl)
+
+ ctrl->cpu = cpu_get_cpuid();
+ ctrl->stepping = get_stepping(ctrl->cpu);
++ ctrl->vdd_mv = is_hsw_ult() ? 1350 : 1500; /** FIXME: Hardcoded, does it matter? **/
+ ctrl->dq_pins_interleaved = cfg->dq_pins_interleaved;
+ ctrl->bootmode = bootmode;
+ }
+diff --git a/src/northbridge/intel/haswell/native_raminit/raminit_native.h b/src/northbridge/intel/haswell/native_raminit/raminit_native.h
+index 01e5ed1bd6..aa86b9aa39 100644
+--- a/src/northbridge/intel/haswell/native_raminit/raminit_native.h
++++ b/src/northbridge/intel/haswell/native_raminit/raminit_native.h
+@@ -3,15 +3,40 @@
+ #ifndef HASWELL_RAMINIT_NATIVE_H
+ #define HASWELL_RAMINIT_NATIVE_H
+
++#include <assert.h>
+ #include <device/dram/ddr3.h>
+ #include <northbridge/intel/haswell/haswell.h>
++#include <string.h>
++#include <types.h>
++
++#include "reg_structs.h"
+
+ #define SPD_LEN 256
+
++/* Each channel has 4 ranks, spread across 2 slots */
++#define NUM_SLOTRANKS 4
++
++#define NUM_GROUPS 2
++
+ /* 8 data lanes + 1 ECC lane */
+ #define NUM_LANES 9
+ #define NUM_LANES_NO_ECC 8
+
++#define COMP_INT 10
++
++/* Always use 12 legs for emphasis (not trained) */
++#define TXEQFULLDRV (3 << 4)
++
++enum command_training_iteration {
++ CT_ITERATION_CLOCK = 0,
++ CT_ITERATION_CMD_NORTH,
++ CT_ITERATION_CMD_SOUTH,
++ CT_ITERATION_CKE,
++ CT_ITERATION_CTL,
++ CT_ITERATION_CMD_VREF,
++ MAX_CT_ITERATION,
++};
++
+ enum raminit_boot_mode {
+ BOOTMODE_COLD,
+ BOOTMODE_WARM,
+@@ -57,6 +82,9 @@ struct sysinfo {
+ * LPDDR-specific functions have stubs which will halt upon execution.
+ */
+ bool lpddr;
++ bool lpddr_dram_odt;
++ uint8_t lpddr_cke_rank_map[NUM_CHANNELS];
++ uint8_t dq_byte_map[NUM_CHANNELS][MAX_CT_ITERATION][2];
+
+ struct raminit_dimm_info dimms[NUM_CHANNELS][NUM_SLOTS];
+ union dimm_flags_ddr3_st flags;
+@@ -93,16 +121,89 @@ struct sysinfo {
+ uint32_t mem_clock_mhz;
+ uint32_t mem_clock_fs; /* Memory clock period in femtoseconds */
+ uint32_t qclkps; /* Quadrature clock period in picoseconds */
++
++ uint16_t vdd_mv;
++
++ union ddr_scram_misc_control_reg misc_control_0;
++
++ union ddr_comp_ctl_0_reg comp_ctl_0;
++ union ddr_comp_ctl_1_reg comp_ctl_1;
++
++ union ddr_data_vref_adjust_reg dimm_vref;
++
++ uint32_t data_offset_train[NUM_CHANNELS][NUM_LANES];
++ uint32_t data_offset_comp[NUM_CHANNELS][NUM_LANES];
++
++ uint32_t dq_control_0[NUM_CHANNELS];
++ uint32_t dq_control_1[NUM_CHANNELS][NUM_LANES];
++ uint32_t dq_control_2[NUM_CHANNELS][NUM_LANES];
++
++ uint16_t tx_dq[NUM_CHANNELS][NUM_SLOTRANKS][NUM_LANES];
++ uint16_t txdqs[NUM_CHANNELS][NUM_SLOTRANKS][NUM_LANES];
++ uint8_t tx_eq[NUM_CHANNELS][NUM_SLOTRANKS][NUM_LANES];
++
++ uint16_t rcven[NUM_CHANNELS][NUM_SLOTRANKS][NUM_LANES];
++ uint8_t rx_eq[NUM_CHANNELS][NUM_SLOTRANKS][NUM_LANES];
++ uint8_t rxdqsp[NUM_CHANNELS][NUM_SLOTRANKS][NUM_LANES];
++ uint8_t rxdqsn[NUM_CHANNELS][NUM_SLOTRANKS][NUM_LANES];
++ int8_t rxvref[NUM_CHANNELS][NUM_SLOTRANKS][NUM_LANES];
++
++ uint8_t clk_pi_code[NUM_CHANNELS][NUM_SLOTRANKS];
++ uint8_t ctl_pi_code[NUM_CHANNELS][NUM_SLOTRANKS];
++ uint8_t cke_pi_code[NUM_CHANNELS][NUM_SLOTRANKS];
++
++ uint8_t cke_cmd_pi_code[NUM_CHANNELS][NUM_GROUPS];
++ uint8_t cmd_north_pi_code[NUM_CHANNELS][NUM_GROUPS];
++ uint8_t cmd_south_pi_code[NUM_CHANNELS][NUM_GROUPS];
+ };
+
++static inline bool is_hsw_ult(void)
++{
++ return CONFIG(INTEL_LYNXPOINT_LP);
++}
++
++static inline bool rank_in_mask(uint8_t rank, uint8_t rankmask)
++{
++ assert(rank < NUM_SLOTRANKS);
++ return !!(BIT(rank) & rankmask);
++}
++
++static inline bool does_ch_exist(const struct sysinfo *ctrl, uint8_t channel)
++{
++ return !!ctrl->dpc[channel];
++}
++
++static inline bool does_rank_exist(const struct sysinfo *ctrl, uint8_t rank)
++{
++ return rank_in_mask(rank, ctrl->rankmap[0] | ctrl->rankmap[1]);
++}
++
++static inline bool rank_in_ch(const struct sysinfo *ctrl, uint8_t rank, uint8_t channel)
++{
++ assert(channel < NUM_CHANNELS);
++ return rank_in_mask(rank, ctrl->rankmap[channel]);
++}
++
++/** TODO: Handling of data_offset_train could be improved, also coupled with reg updates **/
++static inline void clear_data_offset_train_all(struct sysinfo *ctrl)
++{
++ memset(ctrl->data_offset_train, 0, sizeof(ctrl->data_offset_train));
++}
++
+ void raminit_main(enum raminit_boot_mode bootmode);
+
+ enum raminit_status collect_spd_info(struct sysinfo *ctrl);
+ enum raminit_status initialise_mpll(struct sysinfo *ctrl);
+ enum raminit_status convert_timings(struct sysinfo *ctrl);
++enum raminit_status configure_mc(struct sysinfo *ctrl);
++
++void configure_timings(struct sysinfo *ctrl);
++void configure_refresh(struct sysinfo *ctrl);
+
+ enum raminit_status wait_for_first_rcomp(void);
+
++uint8_t get_rx_bias(const struct sysinfo *ctrl);
++
+ uint8_t get_tCWL(uint32_t mem_clock_mhz);
+ uint32_t get_tREFI(uint32_t mem_clock_mhz);
+ uint32_t get_tXP(uint32_t mem_clock_mhz);
+diff --git a/src/northbridge/intel/haswell/native_raminit/reg_structs.h b/src/northbridge/intel/haswell/native_raminit/reg_structs.h
+new file mode 100644
+index 0000000000..d11cda4b3d
+--- /dev/null
++++ b/src/northbridge/intel/haswell/native_raminit/reg_structs.h
+@@ -0,0 +1,405 @@
++/* SPDX-License-Identifier: GPL-2.0-or-later */
++
++#ifndef HASWELL_RAMINIT_REG_STRUCTS_H
++#define HASWELL_RAMINIT_REG_STRUCTS_H
++
++union ddr_data_rx_train_rank_reg {
++ struct __packed {
++ uint32_t rcven : 9; // Bits 8:0
++ uint32_t dqs_p : 6; // Bits 14:9
++ uint32_t rx_eq : 5; // Bits 19:15
++ uint32_t dqs_n : 6; // Bits 25:20
++ int32_t vref : 6; // Bits 31:26
++ };
++ uint32_t raw;
++};
++
++union ddr_data_tx_train_rank_reg {
++ struct __packed {
++ uint32_t dq_delay : 9; // Bits 8:0
++ uint32_t dqs_delay : 9; // Bits 17:9
++ uint32_t : 2; // Bits 19:18
++ uint32_t tx_eq : 6; // Bits 25:20
++ uint32_t : 6; // Bits 31:26
++ };
++ uint32_t raw;
++};
++
++union ddr_data_control_0_reg {
++ struct __packed {
++ uint32_t rx_training_mode : 1; // Bits 0:0
++ uint32_t wl_training_mode : 1; // Bits 1:1
++ uint32_t rl_training_mode : 1; // Bits 2:2
++ uint32_t samp_train_mode : 1; // Bits 3:3
++ uint32_t tx_on : 1; // Bits 4:4
++ uint32_t rf_on : 1; // Bits 5:5
++ uint32_t rx_pi_on : 1; // Bits 6:6
++ uint32_t tx_pi_on : 1; // Bits 7:7
++ uint32_t internal_clocks_on : 1; // Bits 8:8
++ uint32_t repeater_clocks_on : 1; // Bits 9:9
++ uint32_t tx_disable : 1; // Bits 10:10
++ uint32_t rx_disable : 1; // Bits 11:11
++ uint32_t tx_long : 1; // Bits 12:12
++ uint32_t rx_dqs_ctle : 2; // Bits 14:13
++ uint32_t rx_read_pointer : 3; // Bits 17:15
++ uint32_t driver_segment_enable : 1; // Bits 18:18
++ uint32_t data_vccddq_hi : 1; // Bits 19:19
++ uint32_t read_rf_rd : 1; // Bits 20:20
++ uint32_t read_rf_wr : 1; // Bits 21:21
++ uint32_t read_rf_rank : 2; // Bits 23:22
++ uint32_t force_odt_on : 1; // Bits 24:24
++ uint32_t odt_samp_off : 1; // Bits 25:25
++ uint32_t disable_odt_static : 1; // Bits 26:26
++ uint32_t ddr_cr_force_odt_on : 1; // Bits 27:27
++ uint32_t lpddr_mode : 1; // Bits 28:28
++ uint32_t en_read_preamble : 1; // Bits 29:29
++ uint32_t odt_samp_extend_en : 1; // Bits 30:30
++ uint32_t early_rleak_en : 1; // Bits 31:31
++ };
++ uint32_t raw;
++};
++
++union ddr_data_control_1_reg {
++ struct __packed {
++ int32_t ref_pi : 4; // Bits 3:0
++ uint32_t dll_mask : 2; // Bits 5:4
++ uint32_t dll_weaklock : 1; // Bits 6:6
++ uint32_t sdll_segment_disable : 3; // Bits 9:7
++ uint32_t rx_bias_ctl : 3; // Bits 12:10
++ int32_t odt_delay : 4; // Bits 16:13
++ uint32_t odt_duration : 3; // Bits 19:17
++ int32_t sense_amp_delay : 4; // Bits 23:20
++ uint32_t sense_amp_duration : 3; // Bits 26:24
++ uint32_t burst_end_odt_delay : 3; // Bits 29:27 *** TODO: Check Broadwell ***
++ uint32_t lpddr_long_odt_en : 1; // Bits 30:30
++ uint32_t : 1; // Bits 31:31
++ };
++ uint32_t raw;
++};
++
++/* NOTE: Bits 31:19 are only valid for Broadwell onwards */
++union ddr_data_control_2_reg {
++ struct __packed {
++ uint32_t rx_stagger_ctl : 5; // Bits 4:0
++ uint32_t force_bias_on : 1; // Bits 5:5
++ uint32_t force_rx_on : 1; // Bits 6:6
++ uint32_t leaker_comp : 2; // Bits 8:7
++ uint32_t rx_dqs_amp_offset : 4; // Bits 12:9
++ uint32_t rx_clk_stg_num : 5; // Bits 17:13
++ uint32_t wl_long_delay : 1; // Bits 18:18
++ uint32_t enable_vref_pwrdn : 1; // Bits 19:19
++ uint32_t ddr4_mode : 1; // Bits 20:20
++ uint32_t en_vddq_odt : 1; // Bits 21:21
++ uint32_t en_vtt_odt : 1; // Bits 22:22
++ uint32_t en_const_z_eq_tx : 1; // Bits 23:23
++ uint32_t tx_eq_dis : 1; // Bits 24:24
++ uint32_t rx_vref_prog_mfc : 1; // Bits 25:25
++ uint32_t cben : 3; // Bits 28:26
++ uint32_t tx_deskew_disable : 1; // Bits 29:29
++ uint32_t rx_deskew_disable : 1; // Bits 30:30
++ uint32_t dq_slew_dly_byp : 1; // Bits 31:31
++ };
++ uint32_t raw;
++};
++
++union ddr_comp_data_comp_1_reg {
++ struct __packed {
++ uint32_t rcomp_odt_up : 6; // Bits 5:0
++ uint32_t : 3; // Bits 8:6
++ uint32_t rcomp_odt_down : 6; // Bits 14:9
++ uint32_t : 1; // Bits 15:15
++ uint32_t panic_drv_down : 6; // Bits 21:16
++ uint32_t panic_drv_up : 6; // Bits 27:22
++ uint32_t ls_comp : 3; // Bits 30:28
++ uint32_t : 1; // Bits 31:31
++ };
++ uint32_t raw;
++};
++
++union ddr_comp_ctl_0_reg {
++ struct __packed {
++ uint32_t : 3; // Bits 2:0
++ uint32_t disable_odt_static : 1; // Bits 3:3
++ uint32_t odt_up_down_off : 6; // Bits 9:4
++ uint32_t fixed_odt_offset : 1; // Bits 10:10
++ int32_t dq_drv_vref : 4; // Bits 14:11
++ int32_t dq_odt_vref : 5; // Bits 19:15
++ int32_t cmd_drv_vref : 4; // Bits 23:20
++ int32_t ctl_drv_vref : 4; // Bits 27:24
++ int32_t clk_drv_vref : 4; // Bits 31:28
++ };
++ uint32_t raw;
++};
++
++union ddr_comp_ctl_1_reg {
++ struct __packed {
++ uint32_t dq_scomp : 5; // Bits 4:0
++ uint32_t cmd_scomp : 5; // Bits 9:5
++ uint32_t ctl_scomp : 5; // Bits 14:10
++ uint32_t clk_scomp : 5; // Bits 19:15
++ uint32_t tco_cmd_offset : 4; // Bits 23:20
++ uint32_t comp_clk_on : 1; // Bits 24:24
++ uint32_t vccddq_hi : 1; // Bits 25:25
++ uint32_t : 3; // Bits 28:26
++ uint32_t dis_quick_comp : 1; // Bits 29:29
++ uint32_t sin_step : 1; // Bits 30:30
++ uint32_t sin_step_adv : 1; // Bits 31:31
++ };
++ uint32_t raw;
++};
++
++union ddr_data_vref_adjust_reg {
++ struct __packed {
++ int32_t ca_vref_ctrl : 7;// Bits 6:0
++ int32_t ch1_vref_ctrl : 7;// Bits 13:7
++ int32_t ch0_vref_ctrl : 7;// Bits 20:14
++ uint32_t en_dimm_vref_ca : 1;// Bits 21:21
++ uint32_t en_dimm_vref_ch1 : 1;// Bits 22:22
++ uint32_t en_dimm_vref_ch0 : 1;// Bits 23:23
++ uint32_t hi_z_timer_ctrl : 2;// Bits 25:24
++ uint32_t vccddq_hi_qnnn_h : 1;// Bits 26:26
++ uint32_t : 2;// Bits 28:27
++ uint32_t ca_slow_bw : 1;// Bits 29:29
++ uint32_t ch0_slow_bw : 1;// Bits 30:30
++ uint32_t ch1_slow_bw : 1;// Bits 31:31
++ };
++ uint32_t raw;
++};
++
++union ddr_data_vref_control_reg {
++ struct __packed {
++ uint32_t hi_bw_divider : 2; // Bits 1:0
++ uint32_t lo_bw_divider : 2; // Bits 3:2
++ uint32_t sample_divider : 3; // Bits 6:4
++ uint32_t open_loop : 1; // Bits 7:7
++ uint32_t slow_bw_error : 2; // Bits 9:8
++ uint32_t hi_bw_enable : 1; // Bits 10:10
++ uint32_t : 1; // Bits 11:11
++ uint32_t vt_slope_b : 3; // Bits 14:12
++ uint32_t vt_slope_a : 3; // Bits 17:15
++ uint32_t vt_offset : 3; // Bits 20:18
++ uint32_t sel_code : 3; // Bits 23:21
++ uint32_t output_code : 8; // Bits 31:24
++ };
++ uint32_t raw;
++};
++
++union ddr_comp_vsshi_reg {
++ struct __packed {
++ uint32_t panic_drv_down_vref : 6; // Bits 5:0
++ uint32_t panic_drv_up_vref : 6; // Bits 11:6
++ uint32_t vt_offset : 5; // Bits 16:12
++ uint32_t vt_slope_a : 3; // Bits 19:17
++ uint32_t vt_slope_b : 3; // Bits 22:20
++ uint32_t : 9; // Bits 31:23
++ };
++ uint32_t raw;
++};
++
++union ddr_comp_vsshi_control_reg {
++ struct __packed {
++ uint32_t vsshi_target : 6; // Bits 5:0
++ uint32_t hi_bw_divider : 2; // Bits 7:6
++ uint32_t lo_bw_divider : 2; // Bits 9:8
++ uint32_t sample_divider : 3; // Bits 12:10
++ uint32_t open_loop : 1; // Bits 13:13
++ uint32_t bw_error : 2; // Bits 15:14
++ uint32_t panic_driver_en : 1; // Bits 16:16
++ uint32_t : 1; // Bits 17:17
++ uint32_t panic_voltage : 4; // Bits 21:18
++ uint32_t gain_boost : 1; // Bits 22:22
++ uint32_t sel_code : 1; // Bits 23:23
++ uint32_t output_code : 8; // Bits 31:24
++ };
++ uint32_t raw;
++};
++
++union ddr_clk_controls_reg {
++ struct __packed {
++ uint32_t ref_pi : 4; // Bits 3:0
++ uint32_t dll_mask : 2; // Bits 5:4
++ uint32_t : 1; // Bits 6:6
++ uint32_t tx_on : 1; // Bits 7:7
++ uint32_t internal_clocks_on : 1; // Bits 8:8
++ uint32_t repeater_clocks_on : 1; // Bits 9:9
++ uint32_t io_lb_ctl : 2; // Bits 11:10
++ uint32_t odt_mode : 1; // Bits 12:12
++ uint32_t : 8; // Bits 20:13
++ uint32_t rx_vref : 6; // Bits 26:21
++ uint32_t vccddq_hi : 1; // Bits 27:27
++ uint32_t dll_weaklock : 1; // Bits 28:28
++ uint32_t lpddr_mode : 1; // Bits 29:29
++ uint32_t : 2; // Bits 31:30
++ };
++ uint32_t raw;
++};
++
++union ddr_cmd_controls_reg {
++ struct __packed {
++ int32_t ref_pi : 4; // Bits 3:0
++ uint32_t dll_mask : 2; // Bits 5:4
++ uint32_t : 1; // Bits 6:6
++ uint32_t tx_on : 1; // Bits 7:7
++ uint32_t internal_clocks_on : 1; // Bits 8:8
++ uint32_t repeater_clocks_on : 1; // Bits 9:9
++ uint32_t io_lb_ctl : 2; // Bits 11:10
++ uint32_t odt_mode : 1; // Bits 12:12
++ uint32_t cmd_tx_eq : 2; // Bits 14:13
++ uint32_t early_weak_drive : 2; // Bits 16:15
++ uint32_t : 4; // Bits 20:17
++ int32_t rx_vref : 6; // Bits 26:21
++ uint32_t vccddq_hi : 1; // Bits 27:27
++ uint32_t dll_weaklock : 1; // Bits 28:28
++ uint32_t lpddr_mode : 1; // Bits 29:29
++ uint32_t lpddr_ca_a_dis : 1; // Bits 30:30
++ uint32_t lpddr_ca_b_dis : 1; // Bits 31:31
++ };
++ uint32_t raw;
++};
++
++/* Same register definition for CKE and CTL fubs */
++union ddr_cke_ctl_controls_reg {
++ struct __packed {
++ int32_t ref_pi : 4; // Bits 3:0
++ uint32_t dll_mask : 2; // Bits 5:4
++ uint32_t : 1; // Bits 6:6
++ uint32_t tx_on : 1; // Bits 7:7
++ uint32_t internal_clocks_on : 1; // Bits 8:8
++ uint32_t repeater_clocks_on : 1; // Bits 9:9
++ uint32_t io_lb_ctl : 2; // Bits 11:10
++ uint32_t odt_mode : 1; // Bits 12:12
++ uint32_t cmd_tx_eq : 2; // Bits 14:13
++ uint32_t early_weak_drive : 2; // Bits 16:15
++ uint32_t ctl_tx_eq : 2; // Bits 18:17
++ uint32_t ctl_sr_drv : 2; // Bits 20:19
++ int32_t rx_vref : 6; // Bits 26:21
++ uint32_t vccddq_hi : 1; // Bits 27:27
++ uint32_t dll_weaklock : 1; // Bits 28:28
++ uint32_t lpddr_mode : 1; // Bits 29:29
++ uint32_t la_drv_en_ovrd : 1; // Bits 30:30
++ uint32_t lpddr_ca_a_dis : 1; // Bits 31:31
++ };
++ uint32_t raw;
++};
++
++union ddr_scram_misc_control_reg {
++ struct __packed {
++ uint32_t wl_wake_cycles : 2; // Bits 1:0
++ uint32_t wl_sleep_cycles : 3; // Bits 4:2
++ uint32_t force_comp_update : 1; // Bits 5:5
++ uint32_t weaklock_latency : 4; // Bits 9:6
++ uint32_t ddr_no_ch_interleave : 1; // Bits 10:10
++ uint32_t lpddr_mode : 1; // Bits 11:11
++ uint32_t cke_mapping_ch0 : 4; // Bits 15:12
++ uint32_t cke_mapping_ch1 : 4; // Bits 19:16
++ uint32_t : 12; // Bits 31:20
++ };
++ uint32_t raw;
++};
++
++union mcscheds_cbit_reg {
++ struct __packed {
++ uint32_t dis_opp_cas : 1; // Bits 0:0
++ uint32_t dis_opp_is_cas : 1; // Bits 1:1
++ uint32_t dis_opp_ras : 1; // Bits 2:2
++ uint32_t dis_opp_is_ras : 1; // Bits 3:3
++ uint32_t dis_1c_byp : 1; // Bits 4:4
++ uint32_t dis_2c_byp : 1; // Bits 5:5
++ uint32_t dis_deprd_opt : 1; // Bits 6:6
++ uint32_t dis_pt_it : 1; // Bits 7:7
++ uint32_t dis_prcnt_ring : 1; // Bits 8:8
++ uint32_t dis_prcnt_sa : 1; // Bits 9:9
++ uint32_t dis_blkr_ph : 1; // Bits 10:10
++ uint32_t dis_blkr_pe : 1; // Bits 11:11
++ uint32_t dis_blkr_pm : 1; // Bits 12:12
++ uint32_t dis_odt : 1; // Bits 13:13
++ uint32_t oe_always_off : 1; // Bits 14:14
++ uint32_t : 1; // Bits 15:15
++ uint32_t dis_aom : 1; // Bits 16:16
++ uint32_t block_rpq : 1; // Bits 17:17
++ uint32_t block_wpq : 1; // Bits 18:18
++ uint32_t invert_align : 1; // Bits 19:19
++ uint32_t dis_write_gap : 1; // Bits 20:20
++ uint32_t dis_zq : 1; // Bits 21:21
++ uint32_t dis_tt : 1; // Bits 22:22
++ uint32_t dis_opp_ref : 1; // Bits 23:23
++ uint32_t long_zq : 1; // Bits 24:24
++ uint32_t dis_srx_zq : 1; // Bits 25:25
++ uint32_t serialize_zq : 1; // Bits 26:26
++ uint32_t zq_fast_exec : 1; // Bits 27:27
++ uint32_t dis_drive_nop : 1; // Bits 28:28
++ uint32_t pres_wdb_ent : 1; // Bits 29:29
++ uint32_t dis_clk_gate : 1; // Bits 30:30
++ uint32_t : 1; // Bits 31:31
++ };
++ uint32_t raw;
++};
++
++union mcmain_command_rate_limit_reg {
++ struct __packed {
++ uint32_t enable_cmd_limit : 1; // Bits 0:0
++ uint32_t cmd_rate_limit : 3; // Bits 3:1
++ uint32_t reset_on_command : 4; // Bits 7:4
++ uint32_t reset_delay : 4; // Bits 11:8
++ uint32_t ck_to_cke_delay : 2; // Bits 13:12
++ uint32_t : 17; // Bits 30:14
++ uint32_t init_mrw_2n_cs : 1; // Bits 31:31
++ };
++ uint32_t raw;
++};
++
++union mad_chnl_reg {
++ struct __packed {
++ uint32_t ch_a : 2; // Bits 1:0
++ uint32_t ch_b : 2; // Bits 3:2
++ uint32_t ch_c : 2; // Bits 5:4
++ uint32_t stacked_mode : 1; // Bits 6:6
++ uint32_t stkd_mode_bits : 3; // Bits 9:7
++ uint32_t lpddr_mode : 1; // Bits 10:10
++ uint32_t : 21; // Bits 31:11
++ };
++ uint32_t raw;
++};
++
++union mad_dimm_reg {
++ struct __packed {
++ uint32_t dimm_a_size : 8; // Bits 7:0
++ uint32_t dimm_b_size : 8; // Bits 15:8
++ uint32_t dimm_a_sel : 1; // Bits 16:16
++ uint32_t dimm_a_ranks : 1; // Bits 17:17
++ uint32_t dimm_b_ranks : 1; // Bits 18:18
++ uint32_t dimm_a_width : 1; // Bits 19:19
++ uint32_t dimm_b_width : 1; // Bits 20:20
++ uint32_t rank_interleave : 1; // Bits 21:21
++ uint32_t enh_interleave : 1; // Bits 22:22
++ uint32_t : 1; // Bits 23:23
++ uint32_t ecc_mode : 2; // Bits 25:24
++ uint32_t hori_mode : 1; // Bits 26:26
++ uint32_t hori_address : 3; // Bits 29:27
++ uint32_t : 2; // Bits 31:30
++ };
++ uint32_t raw;
++};
++
++union mad_zr_reg {
++ struct __packed {
++ uint32_t : 16; // Bits 15:0
++ uint32_t ch_b_double : 8; // Bits 23:16
++ uint32_t ch_b_single : 8; // Bits 31:24
++ };
++ uint32_t raw;
++};
++
++/* Same definition for P_COMP, M_COMP, D_COMP */
++union pcu_comp_reg {
++ struct __packed {
++ uint32_t comp_disable : 1; // Bits 0:0
++ uint32_t comp_interval : 4; // Bits 4:1
++ uint32_t : 3; // Bits 7:5
++ uint32_t comp_force : 1; // Bits 8:8
++ uint32_t : 23; // Bits 31:9
++ };
++ uint32_t raw;
++};
++
++#endif
+diff --git a/src/northbridge/intel/haswell/native_raminit/timings_refresh.c b/src/northbridge/intel/haswell/native_raminit/timings_refresh.c
+new file mode 100644
+index 0000000000..a9d960f31b
+--- /dev/null
++++ b/src/northbridge/intel/haswell/native_raminit/timings_refresh.c
+@@ -0,0 +1,13 @@
++/* SPDX-License-Identifier: GPL-2.0-or-later */
++
++#include "raminit_native.h"
++
++void configure_timings(struct sysinfo *ctrl)
++{
++ /** TODO: Stub **/
++}
++
++void configure_refresh(struct sysinfo *ctrl)
++{
++ /** TODO: Stub **/
++}
+diff --git a/src/northbridge/intel/haswell/registers/mchbar.h b/src/northbridge/intel/haswell/registers/mchbar.h
+index 45f8174995..4c3f399b5d 100644
+--- a/src/northbridge/intel/haswell/registers/mchbar.h
++++ b/src/northbridge/intel/haswell/registers/mchbar.h
+@@ -7,9 +7,98 @@
+ #define NUM_CHANNELS 2
+ #define NUM_SLOTS 2
+
++/* Indexed register helper macros */
++#define _DDRIO_C_R_B(r, ch, rank, byte) ((r) + 0x100 * (ch) + 0x4 * (rank) + 0x200 * (byte))
++#define _MCMAIN_C_X(r, ch, x) ((r) + 0x400 * (ch) + 0x4 * (x))
++#define _MCMAIN_C(r, ch) ((r) + 0x400 * (ch))
++
+ /* Register definitions */
++
++/* DDR DATA per-channel per-bytelane */
++#define DQ_CONTROL_2(ch, byte) _DDRIO_C_R_B(0x0064, ch, 0, byte)
++
++/* DDR CKE per-channel */
++#define DDR_CKE_ch_CMD_COMP_OFFSET(ch) _DDRIO_C_R_B(0x1204, ch, 0, 0)
++#define DDR_CKE_ch_CMD_PI_CODING(ch) _DDRIO_C_R_B(0x1208, ch, 0, 0)
++
++#define DDR_CKE_ch_CTL_CONTROLS(ch) _DDRIO_C_R_B(0x121c, ch, 0, 0)
++#define DDR_CKE_ch_CTL_RANKS_USED(ch) _DDRIO_C_R_B(0x1220, ch, 0, 0)
++
++/* DDR CTL per-channel */
++#define DDR_CTL_ch_CTL_CONTROLS(ch) _DDRIO_C_R_B(0x1c1c, ch, 0, 0)
++#define DDR_CTL_ch_CTL_RANKS_USED(ch) _DDRIO_C_R_B(0x1c20, ch, 0, 0)
++
++/* DDR CLK per-channel */
++#define DDR_CLK_ch_RANKS_USED(ch) _DDRIO_C_R_B(0x1800, ch, 0, 0)
++#define DDR_CLK_ch_COMP_OFFSET(ch) _DDRIO_C_R_B(0x1808, ch, 0, 0)
++#define DDR_CLK_ch_PI_CODING(ch) _DDRIO_C_R_B(0x180c, ch, 0, 0)
++#define DDR_CLK_ch_CONTROLS(ch) _DDRIO_C_R_B(0x1810, ch, 0, 0)
++
++/* DDR Scrambler */
++#define DDR_SCRAMBLE_ch(ch) (0x2000 + 4 * (ch))
++#define DDR_SCRAM_MISC_CONTROL 0x2008
++
++/* DDR CMDN/CMDS per-channel (writes go to both CMDN and CMDS fubs) */
++#define DDR_CMD_ch_COMP_OFFSET(ch) _DDRIO_C_R_B(0x3204, ch, 0, 0)
++#define DDR_CMD_ch_PI_CODING(ch) _DDRIO_C_R_B(0x3208, ch, 0, 0)
++#define DDR_CMD_ch_CONTROLS(ch) _DDRIO_C_R_B(0x320c, ch, 0, 0)
++
++/* DDR CKE/CTL per-channel (writes go to both CKE and CTL fubs) */
++#define DDR_CKE_CTL_ch_CTL_COMP_OFFSET(ch) _DDRIO_C_R_B(0x3414, ch, 0, 0)
++#define DDR_CKE_CTL_ch_CTL_PI_CODING(ch) _DDRIO_C_R_B(0x3418, ch, 0, 0)
++
++/* DDR DATA broadcast */
++#define DDR_DATA_RX_TRAIN_RANK(rank) _DDRIO_C_R_B(0x3600, 0, rank, 0)
++#define DDR_DATA_RX_PER_BIT_RANK(rank) _DDRIO_C_R_B(0x3610, 0, rank, 0)
++#define DDR_DATA_TX_TRAIN_RANK(rank) _DDRIO_C_R_B(0x3620, 0, rank, 0)
++#define DDR_DATA_TX_PER_BIT_RANK(rank) _DDRIO_C_R_B(0x3630, 0, rank, 0)
++
++#define DDR_DATA_RCOMP_DATA_1 0x3644
++#define DDR_DATA_TX_XTALK 0x3648
++#define DDR_DATA_RX_OFFSET_VDQ 0x364c
++#define DDR_DATA_OFFSET_COMP 0x365c
++#define DDR_DATA_CONTROL_1 0x3660
++
++#define DDR_DATA_OFFSET_TRAIN 0x3670
++#define DDR_DATA_CONTROL_0 0x3674
++#define DDR_DATA_VREF_ADJUST 0x3678
++
++/* DDR CMD broadcast */
++#define DDR_CMD_COMP 0x3700
++
++/* DDR CKE/CTL broadcast */
++#define DDR_CKE_CTL_COMP 0x3810
++
++/* DDR CLK broadcast */
++#define DDR_CLK_COMP 0x3904
++#define DDR_CLK_CONTROLS 0x3910
++#define DDR_CLK_CB_STATUS 0x3918
++
++/* DDR COMP (global) */
++#define DDR_COMP_DATA_COMP_1 0x3a04
++#define DDR_COMP_CMD_COMP 0x3a08
++#define DDR_COMP_CTL_COMP 0x3a0c
++#define DDR_COMP_CLK_COMP 0x3a10
++#define DDR_COMP_CTL_0 0x3a14
++#define DDR_COMP_CTL_1 0x3a18
++#define DDR_COMP_VSSHI 0x3a1c
++#define DDR_COMP_OVERRIDE 0x3a20
++#define DDR_COMP_VSSHI_CONTROL 0x3a24
++
++/* MCMAIN per-channel */
++#define COMMAND_RATE_LIMIT_ch(ch) _MCMAIN_C(0x4010, ch)
++
++#define MC_INIT_STATE_ch(ch) _MCMAIN_C(0x42a0, ch)
++
++/* MCMAIN broadcast */
++#define MCSCHEDS_CBIT 0x4c20
++
++#define MCMNTS_SC_WDBWM 0x4f8c
++
++/* MCDECS */
+ #define MAD_CHNL 0x5000 /* Address Decoder Channel Configuration */
+ #define MAD_DIMM(ch) (0x5004 + (ch) * 4)
++#define MAD_ZR 0x5014
+ #define MC_INIT_STATE_G 0x5030
+ #define MRC_REVISION 0x5034 /* MRC Revision */
+
+@@ -28,6 +117,8 @@
+
+ #define PCU_DDR_PTM_CTL 0x5880
+
++#define PCU_DDR_VOLTAGE 0x58a4
++
+ /* Some power MSRs are also represented in MCHBAR */
+ #define MCH_PKG_POWER_LIMIT_LO 0x59a0
+ #define MCH_PKG_POWER_LIMIT_HI 0x59a4
+@@ -48,6 +139,8 @@
+ #define MAILBOX_BIOS_CMD_FSM_MEASURE_INTVL 0x909
+ #define MAILBOX_BIOS_CMD_READ_PCH_POWER 0xa
+ #define MAILBOX_BIOS_CMD_READ_PCH_POWER_EXT 0xb
++#define MAILBOX_BIOS_CMD_READ_DDR_2X_REFRESH 0x17
++#define MAILBOX_BIOS_CMD_WRITE_DDR_2X_REFRESH 0x18
+ #define MAILBOX_BIOS_CMD_READ_C9C10_VOLTAGE 0x26
+ #define MAILBOX_BIOS_CMD_WRITE_C9C10_VOLTAGE 0x27
+
+@@ -66,6 +159,7 @@
+ #define MC_BIOS_REQ 0x5e00 /* Memory frequency request register */
+ #define MC_BIOS_DATA 0x5e04 /* Miscellaneous information for BIOS */
+ #define SAPMCTL 0x5f00
++#define M_COMP 0x5f08
+
+ #define HDAUDRID 0x6008
+ #define UMAGFXCTL 0x6020
+--
+2.39.2
+
diff --git a/config/coreboot/haswell/patches/0014-haswell-NRI-Add-timings-refresh-programming.patch b/config/coreboot/haswell/patches/0014-haswell-NRI-Add-timings-refresh-programming.patch
new file mode 100644
index 00000000..1b88f350
--- /dev/null
+++ b/config/coreboot/haswell/patches/0014-haswell-NRI-Add-timings-refresh-programming.patch
@@ -0,0 +1,541 @@
+From b64d728bfe7c8ee44af252338257e95d87864659 Mon Sep 17 00:00:00 2001
+From: Angel Pons <th3fanbus@gmail.com>
+Date: Sat, 7 May 2022 20:59:58 +0200
+Subject: [PATCH 14/26] haswell NRI: Add timings/refresh programming
+
+Program the registers with timing and refresh parameters.
+
+Change-Id: Id2ea339d2c9ea8b56c71d6e88ec76949653ff5c2
+Signed-off-by: Angel Pons <th3fanbus@gmail.com>
+---
+ .../haswell/native_raminit/lookup_timings.c | 102 ++++++++
+ .../haswell/native_raminit/raminit_native.h | 14 ++
+ .../haswell/native_raminit/reg_structs.h | 93 +++++++
+ .../haswell/native_raminit/timings_refresh.c | 233 +++++++++++++++++-
+ .../intel/haswell/registers/mchbar.h | 12 +
+ 5 files changed, 452 insertions(+), 2 deletions(-)
+
+diff --git a/src/northbridge/intel/haswell/native_raminit/lookup_timings.c b/src/northbridge/intel/haswell/native_raminit/lookup_timings.c
+index 038686c844..afe2c615d2 100644
+--- a/src/northbridge/intel/haswell/native_raminit/lookup_timings.c
++++ b/src/northbridge/intel/haswell/native_raminit/lookup_timings.c
+@@ -60,3 +60,105 @@ uint32_t get_tXP(const uint32_t mem_clock_mhz)
+ };
+ return lookup_timing(mem_clock_mhz, lut, ARRAY_SIZE(lut));
+ }
++
++static uint32_t get_lpddr_tCKE(const uint32_t mem_clock_mhz)
++{
++ const struct timing_lookup lut[] = {
++ { 533, 4 },
++ { 666, 5 },
++ { fmax, 6 },
++ };
++ return lookup_timing(mem_clock_mhz, lut, ARRAY_SIZE(lut));
++}
++
++static uint32_t get_ddr_tCKE(const uint32_t mem_clock_mhz)
++{
++ const struct timing_lookup lut[] = {
++ { 533, 3 },
++ { 800, 4 },
++ { 933, 5 },
++ { 1200, 6 },
++ { fmax, 7 },
++ };
++ return lookup_timing(mem_clock_mhz, lut, ARRAY_SIZE(lut));
++}
++
++uint32_t get_tCKE(const uint32_t mem_clock_mhz, const bool lpddr)
++{
++ return lpddr ? get_lpddr_tCKE(mem_clock_mhz) : get_ddr_tCKE(mem_clock_mhz);
++}
++
++uint32_t get_tXPDLL(const uint32_t mem_clock_mhz)
++{
++ const struct timing_lookup lut[] = {
++ { 400, 10 },
++ { 533, 13 },
++ { 666, 16 },
++ { 800, 20 },
++ { 933, 23 },
++ { 1066, 26 },
++ { 1200, 29 },
++ { fmax, 32 },
++ };
++ return lookup_timing(mem_clock_mhz, lut, ARRAY_SIZE(lut));
++}
++
++uint32_t get_tAONPD(const uint32_t mem_clock_mhz)
++{
++ const struct timing_lookup lut[] = {
++ { 400, 4 },
++ { 533, 5 },
++ { 666, 6 },
++ { 800, 7 }, /* SNB had 8 */
++ { 933, 8 },
++ { 1066, 10 },
++ { 1200, 11 },
++ { fmax, 12 },
++ };
++ return lookup_timing(mem_clock_mhz, lut, ARRAY_SIZE(lut));
++}
++
++uint32_t get_tMOD(const uint32_t mem_clock_mhz)
++{
++ const struct timing_lookup lut[] = {
++ { 800, 12 },
++ { 933, 14 },
++ { 1066, 16 },
++ { 1200, 18 },
++ { fmax, 20 },
++ };
++ return lookup_timing(mem_clock_mhz, lut, ARRAY_SIZE(lut));
++}
++
++uint32_t get_tXS_offset(const uint32_t mem_clock_mhz)
++{
++ return DIV_ROUND_UP(mem_clock_mhz, 100);
++}
++
++static uint32_t get_lpddr_tZQOPER(const uint32_t mem_clock_mhz)
++{
++ return (mem_clock_mhz * 360) / 1000;
++}
++
++static uint32_t get_ddr_tZQOPER(const uint32_t mem_clock_mhz)
++{
++ const struct timing_lookup lut[] = {
++ { 800, 256 },
++ { 933, 299 },
++ { 1066, 342 },
++ { 1200, 384 },
++ { fmax, 427 },
++ };
++ return lookup_timing(mem_clock_mhz, lut, ARRAY_SIZE(lut));
++}
++
++/* tZQOPER defines the period required for ZQCL after SR exit */
++uint32_t get_tZQOPER(const uint32_t mem_clock_mhz, const bool lpddr)
++{
++ return lpddr ? get_lpddr_tZQOPER(mem_clock_mhz) : get_ddr_tZQOPER(mem_clock_mhz);
++}
++
++uint32_t get_tZQCS(const uint32_t mem_clock_mhz, const bool lpddr)
++{
++ return DIV_ROUND_UP(get_tZQOPER(mem_clock_mhz, lpddr), 4);
++}
+diff --git a/src/northbridge/intel/haswell/native_raminit/raminit_native.h b/src/northbridge/intel/haswell/native_raminit/raminit_native.h
+index aa86b9aa39..cd1f2eb2a5 100644
+--- a/src/northbridge/intel/haswell/native_raminit/raminit_native.h
++++ b/src/northbridge/intel/haswell/native_raminit/raminit_native.h
+@@ -155,6 +155,12 @@ struct sysinfo {
+ uint8_t cke_cmd_pi_code[NUM_CHANNELS][NUM_GROUPS];
+ uint8_t cmd_north_pi_code[NUM_CHANNELS][NUM_GROUPS];
+ uint8_t cmd_south_pi_code[NUM_CHANNELS][NUM_GROUPS];
++
++ union tc_bank_reg tc_bank[NUM_CHANNELS];
++ union tc_bank_rank_a_reg tc_bankrank_a[NUM_CHANNELS];
++ union tc_bank_rank_b_reg tc_bankrank_b[NUM_CHANNELS];
++ union tc_bank_rank_c_reg tc_bankrank_c[NUM_CHANNELS];
++ union tc_bank_rank_d_reg tc_bankrank_d[NUM_CHANNELS];
+ };
+
+ static inline bool is_hsw_ult(void)
+@@ -200,6 +206,14 @@ enum raminit_status configure_mc(struct sysinfo *ctrl);
+ void configure_timings(struct sysinfo *ctrl);
+ void configure_refresh(struct sysinfo *ctrl);
+
++uint32_t get_tCKE(uint32_t mem_clock_mhz, bool lpddr);
++uint32_t get_tXPDLL(uint32_t mem_clock_mhz);
++uint32_t get_tAONPD(uint32_t mem_clock_mhz);
++uint32_t get_tMOD(uint32_t mem_clock_mhz);
++uint32_t get_tXS_offset(uint32_t mem_clock_mhz);
++uint32_t get_tZQOPER(uint32_t mem_clock_mhz, bool lpddr);
++uint32_t get_tZQCS(uint32_t mem_clock_mhz, bool lpddr);
++
+ enum raminit_status wait_for_first_rcomp(void);
+
+ uint8_t get_rx_bias(const struct sysinfo *ctrl);
+diff --git a/src/northbridge/intel/haswell/native_raminit/reg_structs.h b/src/northbridge/intel/haswell/native_raminit/reg_structs.h
+index d11cda4b3d..70487e1640 100644
+--- a/src/northbridge/intel/haswell/native_raminit/reg_structs.h
++++ b/src/northbridge/intel/haswell/native_raminit/reg_structs.h
+@@ -335,6 +335,99 @@ union mcscheds_cbit_reg {
+ uint32_t raw;
+ };
+
++union tc_bank_reg {
++ struct __packed {
++ uint32_t tRCD : 5; // Bits 4:0
++ uint32_t tRP : 5; // Bits 9:5
++ uint32_t tRAS : 6; // Bits 15:10
++ uint32_t tRDPRE : 4; // Bits 19:16
++ uint32_t tWRPRE : 6; // Bits 25:20
++ uint32_t tRRD : 4; // Bits 29:26
++ uint32_t tRPab_ext : 2; // Bits 31:30
++ };
++ uint32_t raw;
++};
++
++union tc_bank_rank_a_reg {
++ struct __packed {
++ uint32_t tCKE : 4; // Bits 3:0
++ uint32_t tFAW : 8; // Bits 11:4
++ uint32_t tRDRD_sr : 3; // Bits 14:12
++ uint32_t tRDRD_dr : 4; // Bits 18:15
++ uint32_t tRDRD_dd : 4; // Bits 22:19
++ uint32_t tRDPDEN : 5; // Bits 27:23
++ uint32_t : 1; // Bits 28:28
++ uint32_t cmd_3st_dis : 1; // Bits 29:29
++ uint32_t cmd_stretch : 2; // Bits 31:30
++ };
++ uint32_t raw;
++};
++
++union tc_bank_rank_b_reg {
++ struct __packed {
++ uint32_t tWRRD_sr : 6; // Bits 5:0
++ uint32_t tWRRD_dr : 4; // Bits 9:6
++ uint32_t tWRRD_dd : 4; // Bits 13:10
++ uint32_t tWRWR_sr : 3; // Bits 16:14
++ uint32_t tWRWR_dr : 4; // Bits 20:17
++ uint32_t tWRWR_dd : 4; // Bits 24:21
++ uint32_t tWRPDEN : 6; // Bits 30:25
++ uint32_t dec_wrd : 1; // Bits 31:31
++ };
++ uint32_t raw;
++};
++
++union tc_bank_rank_c_reg {
++ struct __packed {
++ uint32_t tXPDLL : 6; // Bits 5:0
++ uint32_t tXP : 4; // Bits 9:6
++ uint32_t tAONPD : 4; // Bits 13:10
++ uint32_t tRDWR_sr : 5; // Bits 18:14
++ uint32_t tRDWR_dr : 5; // Bits 23:19
++ uint32_t tRDWR_dd : 5; // Bits 28:24
++ uint32_t : 3; // Bits 31:29
++ };
++ uint32_t raw;
++};
++
++/* NOTE: Non-ULT only implements the lower 21 bits (odt_write_delay is 2 bits) */
++union tc_bank_rank_d_reg {
++ struct __packed {
++ uint32_t tAA : 5; // Bits 4:0
++ uint32_t tCWL : 5; // Bits 9:5
++ uint32_t tCPDED : 2; // Bits 11:10
++ uint32_t tPRPDEN : 2; // Bits 13:12
++ uint32_t odt_read_delay : 3; // Bits 16:14
++ uint32_t odt_read_duration : 2; // Bits 18:17
++ uint32_t odt_write_duration : 3; // Bits 21:19
++ uint32_t odt_write_delay : 3; // Bits 24:22
++ uint32_t odt_always_rank_0 : 1; // Bits 25:25
++ uint32_t cmd_delay : 2; // Bits 27:26
++ uint32_t : 4; // Bits 31:28
++ };
++ uint32_t raw;
++};
++
++union tc_rftp_reg {
++ struct __packed {
++ uint32_t tREFI : 16; // Bits 15:0
++ uint32_t tRFC : 9; // Bits 24:16
++ uint32_t tREFIx9 : 7; // Bits 31:25
++ };
++ uint32_t raw;
++};
++
++union tc_srftp_reg {
++ struct __packed {
++ uint32_t tXSDLL : 12; // Bits 11:0
++ uint32_t tXS_offset : 4; // Bits 15:12
++ uint32_t tZQOPER : 10; // Bits 25:16
++ uint32_t : 2; // Bits 27:26
++ uint32_t tMOD : 4; // Bits 31:28
++ };
++ uint32_t raw;
++};
++
+ union mcmain_command_rate_limit_reg {
+ struct __packed {
+ uint32_t enable_cmd_limit : 1; // Bits 0:0
+diff --git a/src/northbridge/intel/haswell/native_raminit/timings_refresh.c b/src/northbridge/intel/haswell/native_raminit/timings_refresh.c
+index a9d960f31b..20a05b359b 100644
+--- a/src/northbridge/intel/haswell/native_raminit/timings_refresh.c
++++ b/src/northbridge/intel/haswell/native_raminit/timings_refresh.c
+@@ -1,13 +1,242 @@
+ /* SPDX-License-Identifier: GPL-2.0-or-later */
+
++#include <assert.h>
++#include <commonlib/clamp.h>
++#include <console/console.h>
++#include <delay.h>
++#include <device/pci_ops.h>
++#include <northbridge/intel/haswell/haswell.h>
++
+ #include "raminit_native.h"
+
++#define BL 8 /* Burst length */
++#define tCCD 4
++#define tRPRE 1
++#define tWPRE 1
++#define tDLLK 512
++
++static bool is_sodimm(const enum spd_dimm_type_ddr3 type)
++{
++ return type == SPD_DDR3_DIMM_TYPE_SO_DIMM || type == SPD_DDR3_DIMM_TYPE_72B_SO_UDIMM;
++}
++
++static uint8_t get_odt_stretch(const struct sysinfo *const ctrl)
++{
++ for (uint8_t channel = 0; channel < NUM_CHANNELS; channel++) {
++ /* Only stretch with 2 DIMMs per channel */
++ if (ctrl->dpc[channel] != 2)
++ continue;
++
++ const struct raminit_dimm_info *dimms = ctrl->dimms[channel];
++
++ /* Only stretch when using SO-DIMMs */
++ if (!is_sodimm(dimms[0].data.dimm_type) || !is_sodimm(dimms[1].data.dimm_type))
++ continue;
++
++ /* Only stretch with mismatched card types */
++ if (dimms[0].data.reference_card == dimms[1].data.reference_card)
++ continue;
++
++ /* Stretch if one SO-DIMM is card F */
++ for (uint8_t slot = 0; slot < NUM_SLOTS; slot++) {
++ if (dimms[slot].data.reference_card == 5)
++ return 1;
++ }
++ }
++ return 0;
++}
++
++static union tc_bank_reg make_tc_bank(struct sysinfo *const ctrl)
++{
++ return (union tc_bank_reg) {
++ .tRCD = ctrl->tRCD,
++ .tRP = ctrl->tRP,
++ .tRAS = ctrl->tRAS,
++ .tRDPRE = ctrl->tRTP,
++ .tWRPRE = 4 + ctrl->tCWL + ctrl->tWR,
++ .tRRD = ctrl->tRRD,
++ .tRPab_ext = 0, /** TODO: For LPDDR, this is ctrl->tRPab - ctrl->tRP **/
++ };
++}
++
++static union tc_bank_rank_a_reg make_tc_bankrank_a(struct sysinfo *ctrl, uint8_t odt_stretch)
++{
++ /* Use 3N mode for DDR during training, but always use 1N mode for LPDDR */
++ const uint32_t tCMD = ctrl->lpddr ? 0 : 3;
++ const uint32_t tRDRD_drdd = BL / 2 + 1 + tRPRE + odt_stretch + !!ctrl->lpddr;
++
++ return (union tc_bank_rank_a_reg) {
++ .tCKE = get_tCKE(ctrl->mem_clock_mhz, ctrl->lpddr),
++ .tFAW = ctrl->tFAW,
++ .tRDRD_sr = tCCD,
++ .tRDRD_dr = tRDRD_drdd,
++ .tRDRD_dd = tRDRD_drdd,
++ .tRDPDEN = ctrl->tAA + BL / 2 + 1,
++ .cmd_3st_dis = 1, /* Disable command tri-state before training */
++ .cmd_stretch = tCMD,
++ };
++}
++
++static union tc_bank_rank_b_reg make_tc_bankrank_b(struct sysinfo *const ctrl)
++{
++ const uint8_t tWRRD_drdd = ctrl->tCWL - ctrl->tAA + BL / 2 + 2 + tRPRE;
++ const uint8_t tWRWR_drdd = BL / 2 + 2 + tWPRE;
++
++ return (union tc_bank_rank_b_reg) {
++ .tWRRD_sr = tCCD + ctrl->tCWL + ctrl->tWTR + 2,
++ .tWRRD_dr = ctrl->lpddr ? 8 : tWRRD_drdd,
++ .tWRRD_dd = ctrl->lpddr ? 8 : tWRRD_drdd,
++ .tWRWR_sr = tCCD,
++ .tWRWR_dr = tWRWR_drdd,
++ .tWRWR_dd = tWRWR_drdd,
++ .tWRPDEN = ctrl->tWR + ctrl->tCWL + BL / 2,
++ .dec_wrd = ctrl->tCWL >= 6,
++ };
++}
++
++static uint32_t get_tRDWR_sr(const struct sysinfo *ctrl)
++{
++ if (ctrl->lpddr) {
++ const uint32_t tdqsck_max = DIV_ROUND_UP(5500, ctrl->qclkps * 2);
++ return ctrl->tAA - ctrl->tCWL + tCCD + tWPRE + tdqsck_max + 1;
++ } else {
++ const bool fast_clock = ctrl->mem_clock_mhz > 666;
++ return ctrl->tAA - ctrl->tCWL + tCCD + tWPRE + 2 + fast_clock;
++ }
++}
++
++static union tc_bank_rank_c_reg make_tc_bankrank_c(struct sysinfo *ctrl, uint8_t odt_stretch)
++{
++ const uint32_t tRDWR_sr = get_tRDWR_sr(ctrl);
++ const uint32_t tRDWR_drdd = tRDWR_sr + odt_stretch;
++
++ return (union tc_bank_rank_c_reg) {
++ .tXPDLL = get_tXPDLL(ctrl->mem_clock_mhz),
++ .tXP = MAX(ctrl->tXP, 7), /* Use a higher tXP for training */
++ .tAONPD = get_tAONPD(ctrl->mem_clock_mhz),
++ .tRDWR_sr = tRDWR_sr,
++ .tRDWR_dr = tRDWR_drdd,
++ .tRDWR_dd = tRDWR_drdd,
++ };
++}
++
++static union tc_bank_rank_d_reg make_tc_bankrank_d(struct sysinfo *ctrl, uint8_t odt_stretch)
++{
++ const uint32_t odt_rd_delay = ctrl->tAA - ctrl->tCWL;
++ if (!ctrl->lpddr) {
++ return (union tc_bank_rank_d_reg) {
++ .tAA = ctrl->tAA,
++ .tCWL = ctrl->tCWL,
++ .tCPDED = 1,
++ .tPRPDEN = 1,
++ .odt_read_delay = odt_rd_delay,
++ .odt_read_duration = odt_stretch,
++ };
++ }
++
++ /* tCWL has 1 extra clock because of tDQSS, subtract it here */
++ const uint32_t tCWL_lpddr = ctrl->tCWL - 1;
++ const uint32_t odt_wr_delay = tCWL_lpddr + DIV_ROUND_UP(3500, ctrl->qclkps * 2);
++ const uint32_t odt_wr_duration = DIV_ROUND_UP(3500 - 1750, ctrl->qclkps * 2) + 1;
++
++ return (union tc_bank_rank_d_reg) {
++ .tAA = ctrl->tAA,
++ .tCWL = tCWL_lpddr,
++ .tCPDED = 2, /* Required by JEDEC LPDDR3 spec */
++ .tPRPDEN = 1,
++ .odt_read_delay = odt_rd_delay,
++ .odt_read_duration = odt_stretch,
++ .odt_write_delay = odt_wr_delay,
++ .odt_write_duration = odt_wr_duration,
++ .odt_always_rank_0 = ctrl->lpddr_dram_odt
++ };
++}
++
++/* ZQCS period values, in (tREFI * 128) units */
++#define ZQCS_PERIOD_DDR3 128 /* tREFI * 128 = 7.8 us * 128 = 1ms */
++#define ZQCS_PERIOD_LPDDR3 256 /* tREFI * 128 = 3.9 us * 128 = 0.5ms */
++
++static uint32_t make_tc_zqcal(const struct sysinfo *const ctrl)
++{
++ const uint32_t zqcs_period = ctrl->lpddr ? ZQCS_PERIOD_LPDDR3 : ZQCS_PERIOD_DDR3;
++ const uint32_t tZQCS = get_tZQCS(ctrl->mem_clock_mhz, ctrl->lpddr);
++ return tZQCS << (is_hsw_ult() ? 10 : 8) | zqcs_period;
++}
++
++static union tc_rftp_reg make_tc_rftp(const struct sysinfo *const ctrl)
++{
++ /*
++ * The tREFIx9 field should be programmed to minimum of 8.9 * tREFI (to allow
++ * for possible delays from ZQ or isoc) and tRASmax (70us) divided by 1024.
++ */
++ return (union tc_rftp_reg) {
++ .tREFI = ctrl->tREFI,
++ .tRFC = ctrl->tRFC,
++ .tREFIx9 = ctrl->tREFI * 89 / 10240,
++ };
++}
++
++static union tc_srftp_reg make_tc_srftp(const struct sysinfo *const ctrl)
++{
++ return (union tc_srftp_reg) {
++ .tXSDLL = tDLLK,
++ .tXS_offset = get_tXS_offset(ctrl->mem_clock_mhz),
++ .tZQOPER = get_tZQOPER(ctrl->mem_clock_mhz, ctrl->lpddr),
++ .tMOD = get_tMOD(ctrl->mem_clock_mhz) - 8,
++ };
++}
++
+ void configure_timings(struct sysinfo *ctrl)
+ {
+- /** TODO: Stub **/
++ if (ctrl->lpddr)
++ die("%s: Missing support for LPDDR\n");
++
++ const uint8_t odt_stretch = get_odt_stretch(ctrl);
++ const union tc_bank_reg tc_bank = make_tc_bank(ctrl);
++ const union tc_bank_rank_a_reg tc_bank_rank_a = make_tc_bankrank_a(ctrl, odt_stretch);
++ const union tc_bank_rank_b_reg tc_bank_rank_b = make_tc_bankrank_b(ctrl);
++ const union tc_bank_rank_c_reg tc_bank_rank_c = make_tc_bankrank_c(ctrl, odt_stretch);
++ const union tc_bank_rank_d_reg tc_bank_rank_d = make_tc_bankrank_d(ctrl, odt_stretch);
++
++ const uint8_t wr_delay = tc_bank_rank_b.dec_wrd + 1;
++ uint8_t sc_wr_add_delay = 0;
++ sc_wr_add_delay |= wr_delay << 0;
++ sc_wr_add_delay |= wr_delay << 2;
++ sc_wr_add_delay |= wr_delay << 4;
++ sc_wr_add_delay |= wr_delay << 6;
++
++ for (uint8_t channel = 0; channel < NUM_CHANNELS; channel++) {
++ if (!does_ch_exist(ctrl, channel))
++ continue;
++
++ ctrl->tc_bank[channel] = tc_bank;
++ ctrl->tc_bankrank_a[channel] = tc_bank_rank_a;
++ ctrl->tc_bankrank_b[channel] = tc_bank_rank_b;
++ ctrl->tc_bankrank_c[channel] = tc_bank_rank_c;
++ ctrl->tc_bankrank_d[channel] = tc_bank_rank_d;
++
++ mchbar_write32(TC_BANK_ch(channel), ctrl->tc_bank[channel].raw);
++ mchbar_write32(TC_BANK_RANK_A_ch(channel), ctrl->tc_bankrank_a[channel].raw);
++ mchbar_write32(TC_BANK_RANK_B_ch(channel), ctrl->tc_bankrank_b[channel].raw);
++ mchbar_write32(TC_BANK_RANK_C_ch(channel), ctrl->tc_bankrank_c[channel].raw);
++ mchbar_write32(TC_BANK_RANK_D_ch(channel), ctrl->tc_bankrank_d[channel].raw);
++ mchbar_write8(SC_WR_ADD_DELAY_ch(channel), sc_wr_add_delay);
++ }
+ }
+
+ void configure_refresh(struct sysinfo *ctrl)
+ {
+- /** TODO: Stub **/
++ const union tc_srftp_reg tc_srftp = make_tc_srftp(ctrl);
++ const union tc_rftp_reg tc_rftp = make_tc_rftp(ctrl);
++ const uint32_t tc_zqcal = make_tc_zqcal(ctrl);
++
++ for (uint8_t channel = 0; channel < NUM_CHANNELS; channel++) {
++ if (!does_ch_exist(ctrl, channel))
++ continue;
++
++ mchbar_setbits32(TC_RFP_ch(channel), 0xff);
++ mchbar_write32(TC_RFTP_ch(channel), tc_rftp.raw);
++ mchbar_write32(TC_SRFTP_ch(channel), tc_srftp.raw);
++ mchbar_write32(TC_ZQCAL_ch(channel), tc_zqcal);
++ }
+ }
+diff --git a/src/northbridge/intel/haswell/registers/mchbar.h b/src/northbridge/intel/haswell/registers/mchbar.h
+index 4c3f399b5d..2acc5cbbc8 100644
+--- a/src/northbridge/intel/haswell/registers/mchbar.h
++++ b/src/northbridge/intel/haswell/registers/mchbar.h
+@@ -86,9 +86,21 @@
+ #define DDR_COMP_VSSHI_CONTROL 0x3a24
+
+ /* MCMAIN per-channel */
++#define TC_BANK_ch(ch) _MCMAIN_C(0x4000, ch)
++#define TC_BANK_RANK_A_ch(ch) _MCMAIN_C(0x4004, ch)
++#define TC_BANK_RANK_B_ch(ch) _MCMAIN_C(0x4008, ch)
++#define TC_BANK_RANK_C_ch(ch) _MCMAIN_C(0x400c, ch)
+ #define COMMAND_RATE_LIMIT_ch(ch) _MCMAIN_C(0x4010, ch)
++#define TC_BANK_RANK_D_ch(ch) _MCMAIN_C(0x4014, ch)
++#define SC_ROUNDT_LAT_ch(ch) _MCMAIN_C(0x4024, ch)
+
++#define SC_WR_ADD_DELAY_ch(ch) _MCMAIN_C(0x40d0, ch)
++
++#define TC_ZQCAL_ch(ch) _MCMAIN_C(0x4290, ch)
++#define TC_RFP_ch(ch) _MCMAIN_C(0x4294, ch)
++#define TC_RFTP_ch(ch) _MCMAIN_C(0x4298, ch)
+ #define MC_INIT_STATE_ch(ch) _MCMAIN_C(0x42a0, ch)
++#define TC_SRFTP_ch(ch) _MCMAIN_C(0x42a4, ch)
+
+ /* MCMAIN broadcast */
+ #define MCSCHEDS_CBIT 0x4c20
+--
+2.39.2
+
diff --git a/config/coreboot/haswell/patches/0015-haswell-NRI-Program-memory-map.patch b/config/coreboot/haswell/patches/0015-haswell-NRI-Program-memory-map.patch
new file mode 100644
index 00000000..ad8527b2
--- /dev/null
+++ b/config/coreboot/haswell/patches/0015-haswell-NRI-Program-memory-map.patch
@@ -0,0 +1,263 @@
+From 89ff35083af68d1b24c1633886202ecc153af67d Mon Sep 17 00:00:00 2001
+From: Angel Pons <th3fanbus@gmail.com>
+Date: Sat, 7 May 2022 21:24:50 +0200
+Subject: [PATCH 15/26] haswell NRI: Program memory map
+
+This is very similar to Sandy/Ivy Bridge, except that there's several
+registers to program in GDXCBAR. One of these GDXCBAR registers has a
+lock bit that must be set in order for the memory controller to allow
+normal access to DRAM. And it took me four months to realize this one
+bit was the only reason why native raminit did not work.
+
+Change-Id: I3af73a018a7ba948701a542e661e7fefd57591fe
+Signed-off-by: Angel Pons <th3fanbus@gmail.com>
+---
+ .../intel/haswell/native_raminit/Makefile.inc | 1 +
+ .../intel/haswell/native_raminit/memory_map.c | 183 ++++++++++++++++++
+ .../haswell/native_raminit/raminit_main.c | 1 +
+ .../haswell/native_raminit/raminit_native.h | 1 +
+ .../intel/haswell/registers/host_bridge.h | 2 +
+ 5 files changed, 188 insertions(+)
+ create mode 100644 src/northbridge/intel/haswell/native_raminit/memory_map.c
+
+diff --git a/src/northbridge/intel/haswell/native_raminit/Makefile.inc b/src/northbridge/intel/haswell/native_raminit/Makefile.inc
+index fc55277a65..37d527e972 100644
+--- a/src/northbridge/intel/haswell/native_raminit/Makefile.inc
++++ b/src/northbridge/intel/haswell/native_raminit/Makefile.inc
+@@ -4,6 +4,7 @@ romstage-y += configure_mc.c
+ romstage-y += lookup_timings.c
+ romstage-y += init_mpll.c
+ romstage-y += io_comp_control.c
++romstage-y += memory_map.c
+ romstage-y += raminit_main.c
+ romstage-y += raminit_native.c
+ romstage-y += spd_bitmunching.c
+diff --git a/src/northbridge/intel/haswell/native_raminit/memory_map.c b/src/northbridge/intel/haswell/native_raminit/memory_map.c
+new file mode 100644
+index 0000000000..e3aded2b37
+--- /dev/null
++++ b/src/northbridge/intel/haswell/native_raminit/memory_map.c
+@@ -0,0 +1,183 @@
++/* SPDX-License-Identifier: GPL-2.0-or-later */
++
++#include <device/pci_ops.h>
++#include <northbridge/intel/haswell/haswell.h>
++#include <southbridge/intel/lynxpoint/me.h>
++#include <types.h>
++
++#include "raminit_native.h"
++
++/* GDXCBAR */
++#define MPCOHTRK_GDXC_MOT_ADDRESS_LO 0x10
++#define MPCOHTRK_GDXC_MOT_ADDRESS_HI 0x14
++#define MPCOHTRK_GDXC_MOT_REGION 0x18
++
++#define MPCOHTRK_GDXC_OCLA_ADDRESS_LO 0x20
++#define MPCOHTRK_GDXC_OCLA_ADDRESS_HI 0x24
++#define MPCOHTRK_GDXC_OCLA_REGION 0x28
++
++/* This lock bit made me lose what little sanity I had left. - Angel Pons */
++#define MPCOHTRK_GDXC_OCLA_ADDRESS_HI_LOCK BIT(2)
++
++static inline uint32_t gdxcbar_read32(const uintptr_t offset)
++{
++ return read32p((mchbar_read32(GDXCBAR) & ~1) + offset);
++}
++
++static inline void gdxcbar_write32(const uintptr_t offset, const uint32_t value)
++{
++ write32p((mchbar_read32(GDXCBAR) & ~1) + offset, value);
++}
++
++static inline void gdxcbar_clrsetbits32(const uintptr_t offset, uint32_t clear, uint32_t set)
++{
++ const uintptr_t address = (mchbar_read32(GDXCBAR) & ~1) + offset;
++ clrsetbits32((void *)address, clear, set);
++}
++
++#define gdxcbar_setbits32(offset, set) gdxcbar_clrsetbits32(offset, 0, set)
++#define gdxcbar_clrbits32(offset, clear) gdxcbar_clrsetbits32(offset, clear, 0)
++
++/* All values stored in here (except the bool) are specified in MiB */
++struct memory_map_data {
++ uint32_t dpr_size;
++ uint32_t tseg_size;
++ uint32_t gtt_size;
++ uint32_t gms_size;
++ uint32_t me_stolen_size;
++ uint32_t mmio_size;
++ uint32_t touud;
++ uint32_t remaplimit;
++ uint32_t remapbase;
++ uint32_t tom;
++ uint32_t tom_minus_me;
++ uint32_t tolud;
++ uint32_t bdsm_base;
++ uint32_t gtt_base;
++ uint32_t tseg_base;
++ bool reclaim_possible;
++};
++
++static void compute_memory_map(struct memory_map_data *map)
++{
++ map->tom_minus_me = map->tom - map->me_stolen_size;
++
++ /*
++ * MMIO size will actually be slightly smaller than computed,
++ * but matches what MRC does and is more MTRR-friendly given
++ * that TSEG is treated as WB, but SMRR makes TSEG UC anyway.
++ */
++ const uint32_t mmio_size = MIN(map->tom_minus_me, 4096) / 2;
++ map->gtt_base = ALIGN_DOWN(mmio_size, map->tseg_size);
++ map->tseg_base = map->gtt_base - map->tseg_size;
++ map->bdsm_base = map->gtt_base + map->gtt_size;
++ map->tolud = map->bdsm_base + map->gms_size;
++ map->reclaim_possible = map->tom_minus_me > map->tolud;
++
++ if (map->reclaim_possible) {
++ map->remapbase = MAX(4096, map->tom_minus_me);
++ map->touud = MIN(4096, map->tom_minus_me) + map->remapbase - map->tolud;
++ map->remaplimit = map->touud - 1;
++ } else {
++ map->remapbase = 0;
++ map->remaplimit = 0;
++ map->touud = map->tom_minus_me;
++ }
++}
++
++static void display_memory_map(const struct memory_map_data *map)
++{
++ if (!CONFIG(DEBUG_RAM_SETUP))
++ return;
++
++ printk(BIOS_DEBUG, "============ MEMORY MAP ============\n");
++ printk(BIOS_DEBUG, "\n");
++ printk(BIOS_DEBUG, "dpr_size = %u MiB\n", map->dpr_size);
++ printk(BIOS_DEBUG, "tseg_size = %u MiB\n", map->tseg_size);
++ printk(BIOS_DEBUG, "gtt_size = %u MiB\n", map->gtt_size);
++ printk(BIOS_DEBUG, "gms_size = %u MiB\n", map->gms_size);
++ printk(BIOS_DEBUG, "me_stolen_size = %u MiB\n", map->me_stolen_size);
++ printk(BIOS_DEBUG, "\n");
++ printk(BIOS_DEBUG, "touud = %u MiB\n", map->touud);
++ printk(BIOS_DEBUG, "remaplimit = %u MiB\n", map->remaplimit);
++ printk(BIOS_DEBUG, "remapbase = %u MiB\n", map->remapbase);
++ printk(BIOS_DEBUG, "tom = %u MiB\n", map->tom);
++ printk(BIOS_DEBUG, "tom_minus_me = %u MiB\n", map->tom_minus_me);
++ printk(BIOS_DEBUG, "tolud = %u MiB\n", map->tolud);
++ printk(BIOS_DEBUG, "bdsm_base = %u MiB\n", map->bdsm_base);
++ printk(BIOS_DEBUG, "gtt_base = %u MiB\n", map->gtt_base);
++ printk(BIOS_DEBUG, "tseg_base = %u MiB\n", map->tseg_base);
++ printk(BIOS_DEBUG, "\n");
++ printk(BIOS_DEBUG, "reclaim_possible = %s\n", map->reclaim_possible ? "Yes" : "No");
++}
++
++static void map_write_reg64(const uint16_t reg, const uint64_t size)
++{
++ const uint64_t value = size << 20;
++ pci_write_config32(HOST_BRIDGE, reg + 4, value >> 32);
++ pci_write_config32(HOST_BRIDGE, reg + 0, value >> 0);
++}
++
++static void map_write_reg32(const uint16_t reg, const uint32_t size)
++{
++ const uint32_t value = size << 20;
++ pci_write_config32(HOST_BRIDGE, reg, value);
++}
++
++static void program_memory_map(const struct memory_map_data *map)
++{
++ map_write_reg64(TOUUD, map->touud);
++ map_write_reg64(TOM, map->tom);
++ if (map->reclaim_possible) {
++ map_write_reg64(REMAPBASE, map->remapbase);
++ map_write_reg64(REMAPLIMIT, map->remaplimit);
++ }
++ if (map->me_stolen_size) {
++ map_write_reg64(MESEG_LIMIT, 0x80000 - map->me_stolen_size);
++ map_write_reg64(MESEG_BASE, map->tom_minus_me);
++ pci_or_config32(HOST_BRIDGE, MESEG_LIMIT, ME_STLEN_EN);
++ }
++ map_write_reg32(TOLUD, map->tolud);
++ map_write_reg32(BDSM, map->bdsm_base);
++ map_write_reg32(BGSM, map->gtt_base);
++ map_write_reg32(TSEG, map->tseg_base);
++
++ const uint32_t dpr_reg = map->tseg_base << 20 | map->dpr_size << 4;
++ pci_write_config32(HOST_BRIDGE, DPR, dpr_reg);
++
++ const uint16_t gfx_stolen_size = GGC_IGD_MEM_IN_32MB_UNITS(map->gms_size / 32);
++ const uint16_t ggc = map->gtt_size << 8 | gfx_stolen_size;
++ pci_write_config16(HOST_BRIDGE, GGC, ggc);
++
++ /** TODO: Do not hardcode these? GDXC has weird alignment requirements, though. **/
++ gdxcbar_write32(MPCOHTRK_GDXC_MOT_ADDRESS_LO, 0);
++ gdxcbar_write32(MPCOHTRK_GDXC_MOT_ADDRESS_HI, 0);
++ gdxcbar_write32(MPCOHTRK_GDXC_MOT_REGION, 0);
++
++ gdxcbar_write32(MPCOHTRK_GDXC_OCLA_ADDRESS_LO, 0);
++ gdxcbar_write32(MPCOHTRK_GDXC_OCLA_ADDRESS_HI, 0);
++ gdxcbar_write32(MPCOHTRK_GDXC_OCLA_REGION, 0);
++
++ gdxcbar_setbits32(MPCOHTRK_GDXC_OCLA_ADDRESS_HI, MPCOHTRK_GDXC_OCLA_ADDRESS_HI_LOCK);
++}
++
++enum raminit_status configure_memory_map(struct sysinfo *ctrl)
++{
++ struct memory_map_data memory_map = {
++ .tom = ctrl->channel_size_mb[0] + ctrl->channel_size_mb[1],
++ .dpr_size = CONFIG_INTEL_TXT_DPR_SIZE,
++ .tseg_size = CONFIG_SMM_TSEG_SIZE >> 20,
++ .me_stolen_size = intel_early_me_uma_size(),
++ };
++ /** FIXME: MRC hardcodes iGPU parameters, but we should not **/
++ const bool igpu_on = pci_read_config32(HOST_BRIDGE, DEVEN) & DEVEN_D2EN;
++ if (CONFIG(ONBOARD_VGA_IS_PRIMARY) || igpu_on) {
++ memory_map.gtt_size = 2;
++ memory_map.gms_size = 64;
++ pci_or_config32(HOST_BRIDGE, DEVEN, DEVEN_D2EN);
++ }
++ compute_memory_map(&memory_map);
++ display_memory_map(&memory_map);
++ program_memory_map(&memory_map);
++ return 0;
++}
+diff --git a/src/northbridge/intel/haswell/native_raminit/raminit_main.c b/src/northbridge/intel/haswell/native_raminit/raminit_main.c
+index 3a773cfa19..136a8ba989 100644
+--- a/src/northbridge/intel/haswell/native_raminit/raminit_main.c
++++ b/src/northbridge/intel/haswell/native_raminit/raminit_main.c
+@@ -24,6 +24,7 @@ static const struct task_entry cold_boot[] = {
+ { initialise_mpll, true, "INITMPLL", },
+ { convert_timings, true, "CONVTIM", },
+ { configure_mc, true, "CONFMC", },
++ { configure_memory_map, true, "MEMMAP", },
+ };
+
+ /* Return a generic stepping value to make stepping checks simpler */
+diff --git a/src/northbridge/intel/haswell/native_raminit/raminit_native.h b/src/northbridge/intel/haswell/native_raminit/raminit_native.h
+index cd1f2eb2a5..4763b25e8d 100644
+--- a/src/northbridge/intel/haswell/native_raminit/raminit_native.h
++++ b/src/northbridge/intel/haswell/native_raminit/raminit_native.h
+@@ -202,6 +202,7 @@ enum raminit_status collect_spd_info(struct sysinfo *ctrl);
+ enum raminit_status initialise_mpll(struct sysinfo *ctrl);
+ enum raminit_status convert_timings(struct sysinfo *ctrl);
+ enum raminit_status configure_mc(struct sysinfo *ctrl);
++enum raminit_status configure_memory_map(struct sysinfo *ctrl);
+
+ void configure_timings(struct sysinfo *ctrl);
+ void configure_refresh(struct sysinfo *ctrl);
+diff --git a/src/northbridge/intel/haswell/registers/host_bridge.h b/src/northbridge/intel/haswell/registers/host_bridge.h
+index 1ee0ab2890..0228cf6bb9 100644
+--- a/src/northbridge/intel/haswell/registers/host_bridge.h
++++ b/src/northbridge/intel/haswell/registers/host_bridge.h
+@@ -34,6 +34,8 @@
+
+ #define MESEG_BASE 0x70 /* Management Engine Base */
+ #define MESEG_LIMIT 0x78 /* Management Engine Limit */
++#define MELCK (1 << 10) /* ME Range Lock */
++#define ME_STLEN_EN (1 << 11) /* ME Stolen Memory Enable */
+
+ #define PAM0 0x80
+ #define PAM1 0x81
+--
+2.39.2
+
diff --git a/config/coreboot/haswell/patches/0016-haswell-NRI-Add-DDR3-JEDEC-reset-and-init.patch b/config/coreboot/haswell/patches/0016-haswell-NRI-Add-DDR3-JEDEC-reset-and-init.patch
new file mode 100644
index 00000000..c321d239
--- /dev/null
+++ b/config/coreboot/haswell/patches/0016-haswell-NRI-Add-DDR3-JEDEC-reset-and-init.patch
@@ -0,0 +1,1038 @@
+From d24def01ec15f41a48331ef1e236270b2df90b84 Mon Sep 17 00:00:00 2001
+From: Angel Pons <th3fanbus@gmail.com>
+Date: Sat, 7 May 2022 21:49:40 +0200
+Subject: [PATCH 16/26] haswell NRI: Add DDR3 JEDEC reset and init
+
+Implement JEDEC reset and init sequence for DDR3. The MRS commands are
+issued through the REUT (Robust Electrical Unified Testing) hardware.
+
+Change-Id: I2a0c066537021b587599228086727cb1e041bff5
+Signed-off-by: Angel Pons <th3fanbus@gmail.com>
+---
+ .../intel/haswell/native_raminit/Makefile.inc | 3 +
+ .../intel/haswell/native_raminit/ddr3.c | 217 ++++++++++++++++++
+ .../haswell/native_raminit/io_comp_control.c | 19 ++
+ .../haswell/native_raminit/jedec_reset.c | 120 ++++++++++
+ .../haswell/native_raminit/raminit_main.c | 2 +
+ .../haswell/native_raminit/raminit_native.h | 101 ++++++++
+ .../haswell/native_raminit/reg_structs.h | 154 +++++++++++++
+ .../intel/haswell/native_raminit/reut.c | 196 ++++++++++++++++
+ .../intel/haswell/registers/mchbar.h | 21 ++
+ src/southbridge/intel/lynxpoint/pch.h | 2 +
+ 10 files changed, 835 insertions(+)
+ create mode 100644 src/northbridge/intel/haswell/native_raminit/ddr3.c
+ create mode 100644 src/northbridge/intel/haswell/native_raminit/jedec_reset.c
+ create mode 100644 src/northbridge/intel/haswell/native_raminit/reut.c
+
+diff --git a/src/northbridge/intel/haswell/native_raminit/Makefile.inc b/src/northbridge/intel/haswell/native_raminit/Makefile.inc
+index 37d527e972..e9212df9e6 100644
+--- a/src/northbridge/intel/haswell/native_raminit/Makefile.inc
++++ b/src/northbridge/intel/haswell/native_raminit/Makefile.inc
+@@ -1,11 +1,14 @@
+ ## SPDX-License-Identifier: GPL-2.0-or-later
+
+ romstage-y += configure_mc.c
++romstage-y += ddr3.c
++romstage-y += jedec_reset.c
+ romstage-y += lookup_timings.c
+ romstage-y += init_mpll.c
+ romstage-y += io_comp_control.c
+ romstage-y += memory_map.c
+ romstage-y += raminit_main.c
+ romstage-y += raminit_native.c
++romstage-y += reut.c
+ romstage-y += spd_bitmunching.c
+ romstage-y += timings_refresh.c
+diff --git a/src/northbridge/intel/haswell/native_raminit/ddr3.c b/src/northbridge/intel/haswell/native_raminit/ddr3.c
+new file mode 100644
+index 0000000000..6ddb11488b
+--- /dev/null
++++ b/src/northbridge/intel/haswell/native_raminit/ddr3.c
+@@ -0,0 +1,217 @@
++/* SPDX-License-Identifier: GPL-2.0-or-later */
++
++#include <assert.h>
++#include <console/console.h>
++#include <northbridge/intel/haswell/haswell.h>
++#include <types.h>
++
++#include "raminit_native.h"
++
++#define DDR3_RTTNOM(a, b, c) (((a) << 9) | ((b) << 6) | ((c) << 2))
++
++uint16_t encode_ddr3_rttnom(const uint32_t rttnom)
++{
++ switch (rttnom) {
++ case 0: return DDR3_RTTNOM(0, 0, 0); /* RttNom is disabled */
++ case 20: return DDR3_RTTNOM(1, 0, 0); /* RZQ/12 */
++ case 30: return DDR3_RTTNOM(1, 0, 1); /* RZQ/8 */
++ case 40: return DDR3_RTTNOM(0, 1, 1); /* RZQ/6 */
++ case 60: return DDR3_RTTNOM(0, 0, 1); /* RZQ/4 */
++ case 120: return DDR3_RTTNOM(0, 1, 0); /* RZQ/2 */
++ }
++ printk(BIOS_ERR, "%s: Invalid rtt_nom value %u\n", __func__, rttnom);
++ return 0;
++}
++
++static const uint8_t jedec_wr_t[12] = { 1, 2, 3, 4, 5, 5, 6, 6, 7, 7, 0, 0 };
++
++static void ddr3_program_mr0(struct sysinfo *ctrl, const uint8_t dll_reset)
++{
++ assert(ctrl->tWR >= 5 && ctrl->tWR <= 16);
++ assert(ctrl->tAA >= 4);
++ const uint8_t jedec_cas = ctrl->tAA - 4;
++ const union {
++ struct __packed {
++ uint16_t burst_length : 2; // Bits 1:0
++ uint16_t cas_latency_msb : 1; // Bits 2:2
++ uint16_t read_burst_type : 1; // Bits 3:3
++ uint16_t cas_latency_low : 3; // Bits 6:4
++ uint16_t test_mode : 1; // Bits 7:7
++ uint16_t dll_reset : 1; // Bits 8:8
++ uint16_t write_recovery : 3; // Bits 11:9
++ uint16_t precharge_pd_dll : 1; // Bits 12:12
++ uint16_t : 3; // Bits 15:13
++ };
++ uint16_t raw;
++ } mr0reg = {
++ .burst_length = 0,
++ .cas_latency_msb = !!(jedec_cas & BIT(3)),
++ .read_burst_type = 0,
++ .cas_latency_low = jedec_cas & 0x7,
++ .dll_reset = 1,
++ .write_recovery = jedec_wr_t[ctrl->tWR - 5],
++ .precharge_pd_dll = 0,
++ };
++ for (uint8_t channel = 0; channel < NUM_CHANNELS; channel++) {
++ if (!does_ch_exist(ctrl, channel))
++ continue;
++
++ for (uint8_t slot = 0; slot < NUM_SLOTS; slot++) {
++ if (!rank_in_ch(ctrl, slot + slot, channel))
++ continue;
++
++ if (!ctrl->restore_mrs)
++ ctrl->mr0[channel][slot] = mr0reg.raw;
++ }
++ reut_issue_mrs_all(ctrl, channel, 0, ctrl->mr0[channel]);
++ }
++}
++
++void ddr3_program_mr1(struct sysinfo *ctrl, const uint8_t wl_mode, const uint8_t q_off)
++{
++ /*
++ * JESD79-3F (JEDEC DDR3 spec) refers to bit 0 of MR1 as 'DLL Enable'.
++ * However, its encoding is weird, and 'DLL Disable' makes more sense.
++ *
++ * Moreover, bit 5 is part of ODIC (Output Driver Impedance Control),
++ * but all encodings where MR1 bit 5 is 1 are reserved. Thus, omit it.
++ */
++ union {
++ struct __packed {
++ uint16_t dll_disable : 1; // Bits 0:0
++ uint16_t od_impedance_ctl : 1; // Bits 1:1
++ uint16_t odt_rtt_nom_low : 1; // Bits 2:2
++ uint16_t additive_latency : 2; // Bits 4:3
++ uint16_t : 1; // Bits 5:5
++ uint16_t odt_rtt_nom_mid : 1; // Bits 6:6
++ uint16_t write_level_mode : 1; // Bits 7:7
++ uint16_t : 1; // Bits 8:8
++ uint16_t odt_rtt_nom_high : 1; // Bits 9:9
++ uint16_t : 1; // Bits 10:10
++ uint16_t t_dqs : 1; // Bits 11:11
++ uint16_t q_off : 1; // Bits 12:12
++ uint16_t : 3; // Bits 15:13
++ };
++ uint16_t raw;
++ } mr1reg = {
++ .dll_disable = 0,
++ .od_impedance_ctl = 1, /* RZQ/7 */
++ .additive_latency = 0,
++ .write_level_mode = wl_mode,
++ .t_dqs = 0,
++ .q_off = q_off,
++ };
++ for (uint8_t channel = 0; channel < NUM_CHANNELS; channel++) {
++ if (!does_ch_exist(ctrl, channel))
++ continue;
++
++ mr1reg.raw &= ~RTTNOM_MASK;
++ mr1reg.raw |= encode_ddr3_rttnom(ctrl->dpc[channel] == 2 ? 60 : 0);
++ for (uint8_t slot = 0; slot < NUM_SLOTS; slot++) {
++ if (!rank_in_ch(ctrl, slot + slot, channel))
++ continue;
++
++ if (!ctrl->restore_mrs)
++ ctrl->mr1[channel][slot] = mr1reg.raw;
++ }
++ reut_issue_mrs_all(ctrl, channel, 1, ctrl->mr1[channel]);
++ }
++}
++
++enum {
++ RTT_WR_OFF = 0,
++ RTT_WR_60 = 1,
++ RTT_WR_120 = 2,
++};
++
++static void ddr3_program_mr2(struct sysinfo *ctrl)
++{
++ assert(ctrl->tCWL >= 5);
++ const bool dimm_srt = ctrl->flags.ext_temp_refresh && !ctrl->flags.asr;
++
++ const union {
++ struct __packed {
++ uint16_t partial_array_sr : 3; // Bits 0:2
++ uint16_t cas_write_latency : 3; // Bits 5:3
++ uint16_t auto_self_refresh : 1; // Bits 6:6
++ uint16_t self_refresh_temp : 1; // Bits 7:7
++ uint16_t : 1; // Bits 8:8
++ uint16_t odt_rtt_wr : 2; // Bits 10:9
++ uint16_t : 5; // Bits 15:11
++ };
++ uint16_t raw;
++ } mr2reg = {
++ .partial_array_sr = 0,
++ .cas_write_latency = ctrl->tCWL - 5,
++ .auto_self_refresh = ctrl->flags.asr,
++ .self_refresh_temp = dimm_srt,
++ .odt_rtt_wr = is_hsw_ult() ? RTT_WR_120 : RTT_WR_60,
++ };
++ for (uint8_t channel = 0; channel < NUM_CHANNELS; channel++) {
++ if (!does_ch_exist(ctrl, channel))
++ continue;
++
++ for (uint8_t slot = 0; slot < NUM_SLOTS; slot++) {
++ if (!rank_in_ch(ctrl, slot + slot, channel))
++ continue;
++
++ if (!ctrl->restore_mrs)
++ ctrl->mr2[channel][slot] = mr2reg.raw;
++ }
++ /* MR2 shadow register is similar but not identical to MR2 */
++ if (!ctrl->restore_mrs) {
++ union tc_mr2_shadow_reg tc_mr2_shadow = {
++ .raw = mr2reg.raw & 0x073f,
++ };
++ for (uint8_t slot = 0; slot < NUM_SLOTS; slot++) {
++ if (!rank_in_ch(ctrl, slot + slot, channel))
++ continue;
++
++ if (dimm_srt)
++ tc_mr2_shadow.srt_available |= BIT(slot);
++
++ if (ctrl->rank_mirrored[channel] & BIT(slot + slot + 1))
++ tc_mr2_shadow.addr_bit_swizzle |= BIT(slot);
++ }
++ mchbar_write32(TC_MR2_SHADOW_ch(channel), tc_mr2_shadow.raw);
++ }
++ reut_issue_mrs_all(ctrl, channel, 2, ctrl->mr2[channel]);
++ }
++}
++
++static void ddr3_program_mr3(struct sysinfo *ctrl, const uint8_t mpr_mode)
++{
++ const union {
++ struct __packed {
++ uint16_t mpr_loc : 2; // Bits 1:0
++ uint16_t mpr_mode : 1; // Bits 2:2
++ uint16_t : 13; // Bits 15:3
++ };
++ uint16_t raw;
++ } mr3reg = {
++ .mpr_loc = 0,
++ .mpr_mode = mpr_mode,
++ };
++ for (uint8_t channel = 0; channel < NUM_CHANNELS; channel++) {
++ if (!does_ch_exist(ctrl, channel))
++ continue;
++
++ for (uint8_t slot = 0; slot < NUM_SLOTS; slot++) {
++ if (!rank_in_ch(ctrl, slot + slot, channel))
++ continue;
++
++ if (!ctrl->restore_mrs)
++ ctrl->mr3[channel][slot] = mr3reg.raw;
++ }
++ reut_issue_mrs_all(ctrl, channel, 3, ctrl->mr3[channel]);
++ }
++}
++
++enum raminit_status ddr3_jedec_init(struct sysinfo *ctrl)
++{
++ ddr3_program_mr2(ctrl);
++ ddr3_program_mr3(ctrl, 0);
++ ddr3_program_mr1(ctrl, 0, 0);
++ ddr3_program_mr0(ctrl, 1);
++ return reut_issue_zq(ctrl, ctrl->chanmap, ZQ_INIT);
++}
+diff --git a/src/northbridge/intel/haswell/native_raminit/io_comp_control.c b/src/northbridge/intel/haswell/native_raminit/io_comp_control.c
+index 7e96c08938..ad8c848e57 100644
+--- a/src/northbridge/intel/haswell/native_raminit/io_comp_control.c
++++ b/src/northbridge/intel/haswell/native_raminit/io_comp_control.c
+@@ -8,6 +8,25 @@
+
+ #include "raminit_native.h"
+
++enum raminit_status io_reset(void)
++{
++ union mc_init_state_g_reg mc_init_state_g = {
++ .raw = mchbar_read32(MC_INIT_STATE_G),
++ };
++ mc_init_state_g.reset_io = 1;
++ mchbar_write32(MC_INIT_STATE_G, mc_init_state_g.raw);
++ struct stopwatch timer;
++ stopwatch_init_msecs_expire(&timer, 2000);
++ do {
++ mc_init_state_g.raw = mchbar_read32(MC_INIT_STATE_G);
++ if (mc_init_state_g.reset_io == 0)
++ return RAMINIT_STATUS_SUCCESS;
++
++ } while (!stopwatch_expired(&timer));
++ printk(BIOS_ERR, "Timed out waiting for DDR I/O reset to complete\n");
++ return RAMINIT_STATUS_POLL_TIMEOUT;
++}
++
+ enum raminit_status wait_for_first_rcomp(void)
+ {
+ struct stopwatch timer;
+diff --git a/src/northbridge/intel/haswell/native_raminit/jedec_reset.c b/src/northbridge/intel/haswell/native_raminit/jedec_reset.c
+new file mode 100644
+index 0000000000..de0f676758
+--- /dev/null
++++ b/src/northbridge/intel/haswell/native_raminit/jedec_reset.c
+@@ -0,0 +1,120 @@
++/* SPDX-License-Identifier: GPL-2.0-or-later */
++
++#include <console/console.h>
++#include <delay.h>
++#include <northbridge/intel/haswell/haswell.h>
++#include <southbridge/intel/lynxpoint/pch.h>
++#include <types.h>
++#include <timer.h>
++
++#include "raminit_native.h"
++
++static void assert_reset(const bool do_reset)
++{
++ if (is_hsw_ult()) {
++ uint32_t pm_cfg2 = RCBA32(PM_CFG2);
++ if (do_reset)
++ pm_cfg2 &= ~PM_CFG2_DRAM_RESET_CTL;
++ else
++ pm_cfg2 |= PM_CFG2_DRAM_RESET_CTL;
++ RCBA32(PM_CFG2) = pm_cfg2;
++ } else {
++ union mc_init_state_g_reg mc_init_state_g = {
++ .raw = mchbar_read32(MC_INIT_STATE_G),
++ };
++ mc_init_state_g.ddr_not_reset = !do_reset;
++ mchbar_write32(MC_INIT_STATE_G, mc_init_state_g.raw);
++ }
++}
++
++/*
++ * Perform JEDEC reset.
++ *
++ * If RTT_NOM is to be enabled in MR1, the ODT input signal must be
++ * statically held low in our system since RTT_NOM is always enabled.
++ */
++static void jedec_reset(struct sysinfo *ctrl)
++{
++ if (is_hsw_ult())
++ assert_reset(false);
++
++ union mc_init_state_g_reg mc_init_state_g = {
++ .ddr_not_reset = 1,
++ .safe_self_refresh = 1,
++ };
++ mchbar_write32(MC_INIT_STATE_G, mc_init_state_g.raw);
++
++ union reut_misc_cke_ctrl_reg reut_misc_cke_ctrl = {
++ .cke_override = 0xf,
++ .cke_on = 0,
++ };
++ mchbar_write32(REUT_MISC_CKE_CTRL, reut_misc_cke_ctrl.raw);
++
++ assert_reset(true);
++
++ /** TODO: check and switch DDR3 voltage here (mainboard-specific) **/
++
++ udelay(200);
++
++ assert_reset(false);
++
++ udelay(500);
++
++ mc_init_state_g.dclk_enable = 1;
++ mchbar_write32(MC_INIT_STATE_G, mc_init_state_g.raw);
++
++ /* Delay at least 20 nanoseconds for tCKSRX */
++ tick_delay(1);
++
++ for (uint8_t channel = 0; channel < NUM_CHANNELS; channel++) {
++ reut_misc_cke_ctrl.cke_on = ctrl->rankmap[channel];
++ mchbar_write32(REUT_ch_MISC_CKE_CTRL(channel), reut_misc_cke_ctrl.raw);
++ }
++
++ /*
++ * Wait minimum of reset CKE exit time, tXPR.
++ * Spec says MAX(tXS, 5 tCK). 5 tCK is 10 ns.
++ */
++ tick_delay(1);
++}
++
++enum raminit_status do_jedec_init(struct sysinfo *ctrl)
++{
++ /* Never do a JEDEC reset in S3 resume */
++ if (ctrl->bootmode == BOOTMODE_S3)
++ return RAMINIT_STATUS_SUCCESS;
++
++ enum raminit_status status = io_reset();
++ if (status)
++ return status;
++
++ status = wait_for_first_rcomp();
++ if (status)
++ return status;
++
++ /* Force ODT low (JEDEC spec) */
++ const union reut_misc_odt_ctrl_reg reut_misc_odt_ctrl = {
++ .odt_override = 0xf,
++ .odt_on = 0,
++ };
++ mchbar_write32(REUT_MISC_ODT_CTRL, reut_misc_odt_ctrl.raw);
++
++ /*
++ * Note: Haswell MRC does not clear ODT override for LPDDR3. However,
++ * Broadwell MRC does. Hell suspects this difference is important, as
++ * there is an erratum in the specification update for Broadwell:
++ *
++ * Erratum BDM74: LPDDR3 Memory Training May Cause Platform Boot Failure
++ */
++ if (ctrl->lpddr)
++ die("%s: LPDDR-specific JEDEC init not implemented\n", __func__);
++
++ jedec_reset(ctrl);
++ status = ddr3_jedec_init(ctrl);
++ if (!status)
++ ctrl->restore_mrs = true;
++
++ /* Release ODT override */
++ mchbar_write32(REUT_MISC_ODT_CTRL, 0);
++ return status;
++}
+diff --git a/src/northbridge/intel/haswell/native_raminit/raminit_main.c b/src/northbridge/intel/haswell/native_raminit/raminit_main.c
+index 136a8ba989..73ff180b8c 100644
+--- a/src/northbridge/intel/haswell/native_raminit/raminit_main.c
++++ b/src/northbridge/intel/haswell/native_raminit/raminit_main.c
+@@ -25,6 +25,7 @@ static const struct task_entry cold_boot[] = {
+ { convert_timings, true, "CONVTIM", },
+ { configure_mc, true, "CONFMC", },
+ { configure_memory_map, true, "MEMMAP", },
++ { do_jedec_init, true, "JEDECINIT", },
+ };
+
+ /* Return a generic stepping value to make stepping checks simpler */
+@@ -58,6 +59,7 @@ static void initialize_ctrl(struct sysinfo *ctrl)
+ ctrl->stepping = get_stepping(ctrl->cpu);
+ ctrl->vdd_mv = is_hsw_ult() ? 1350 : 1500; /** FIXME: Hardcoded, does it matter? **/
+ ctrl->dq_pins_interleaved = cfg->dq_pins_interleaved;
++ ctrl->restore_mrs = false;
+ ctrl->bootmode = bootmode;
+ }
+
+diff --git a/src/northbridge/intel/haswell/native_raminit/raminit_native.h b/src/northbridge/intel/haswell/native_raminit/raminit_native.h
+index 4763b25e8d..e3cf4254a0 100644
+--- a/src/northbridge/intel/haswell/native_raminit/raminit_native.h
++++ b/src/northbridge/intel/haswell/native_raminit/raminit_native.h
+@@ -27,6 +27,30 @@
+ /* Always use 12 legs for emphasis (not trained) */
+ #define TXEQFULLDRV (3 << 4)
+
++/* DDR3 mode register bits */
++#define MR0_DLL_RESET BIT(8)
++
++#define MR1_WL_ENABLE BIT(7)
++#define MR1_QOFF_ENABLE BIT(12) /* If set, output buffers disabled */
++
++#define RTTNOM_MASK (BIT(9) | BIT(6) | BIT(2))
++
++/* ZQ calibration types */
++enum {
++ ZQ_INIT, /* DDR3: ZQCL with tZQinit, LPDDR3: ZQ Init with tZQinit */
++ ZQ_LONG, /* DDR3: ZQCL with tZQoper, LPDDR3: ZQ Long with tZQCL */
++ ZQ_SHORT, /* DDR3: ZQCS with tZQCS, LPDDR3: ZQ Short with tZQCS */
++ ZQ_RESET, /* DDR3: not used, LPDDR3: ZQ Reset with tZQreset */
++};
++
++/* REUT initialisation modes */
++enum {
++ REUT_MODE_IDLE = 0,
++ REUT_MODE_TEST = 1,
++ REUT_MODE_MRS = 2,
++ REUT_MODE_NOP = 3, /* Normal operation mode */
++};
++
+ enum command_training_iteration {
+ CT_ITERATION_CLOCK = 0,
+ CT_ITERATION_CMD_NORTH,
+@@ -50,6 +74,7 @@ enum raminit_status {
+ RAMINIT_STATUS_UNSUPPORTED_MEMORY,
+ RAMINIT_STATUS_MPLL_INIT_FAILURE,
+ RAMINIT_STATUS_POLL_TIMEOUT,
++ RAMINIT_STATUS_REUT_ERROR,
+ RAMINIT_STATUS_UNSPECIFIED_ERROR, /** TODO: Deprecated in favor of specific values **/
+ };
+
+@@ -72,6 +97,7 @@ struct sysinfo {
+ uint32_t cpu; /* CPUID value */
+
+ bool dq_pins_interleaved;
++ bool restore_mrs;
+
+ /** TODO: ECC support untested **/
+ bool is_ecc;
+@@ -161,6 +187,11 @@ struct sysinfo {
+ union tc_bank_rank_b_reg tc_bankrank_b[NUM_CHANNELS];
+ union tc_bank_rank_c_reg tc_bankrank_c[NUM_CHANNELS];
+ union tc_bank_rank_d_reg tc_bankrank_d[NUM_CHANNELS];
++
++ uint16_t mr0[NUM_CHANNELS][NUM_SLOTRANKS];
++ uint16_t mr1[NUM_CHANNELS][NUM_SLOTRANKS];
++ uint16_t mr2[NUM_CHANNELS][NUM_SLOTRANKS];
++ uint16_t mr3[NUM_CHANNELS][NUM_SLOTRANKS];
+ };
+
+ static inline bool is_hsw_ult(void)
+@@ -196,6 +227,55 @@ static inline void clear_data_offset_train_all(struct sysinfo *ctrl)
+ memset(ctrl->data_offset_train, 0, sizeof(ctrl->data_offset_train));
+ }
+
++/* Number of ticks to wait in units of 69.841279 ns (citation needed) */
++static inline void tick_delay(const uint32_t delay)
++{
++ volatile uint32_t junk;
++
++ /* Just perform reads to a random register */
++ for (uint32_t start = 0; start <= delay; start++)
++ junk = mchbar_read32(REUT_ERR_DATA_STATUS);
++}
++
++/*
++ * 64-bit MCHBAR registers need to be accessed atomically. If one uses
++ * two 32-bit ops instead, there will be problems with the REUT's CADB
++ * (Command Address Data Buffer): hardware automatically advances the
++ * pointer into the register file after a write to the input register.
++ */
++static inline uint64_t mchbar_read64(const uintptr_t x)
++{
++ const uint64_t *offset = (uint64_t *)(CONFIG_FIXED_MCHBAR_MMIO_BASE + x);
++ uint64_t mmxsave, v;
++ asm volatile (
++ "\n\t movq %%mm0, %0"
++ "\n\t movq %2, %%mm0"
++ "\n\t movq %%mm0, %1"
++ "\n\t movq %3, %%mm0"
++ "\n\t emms"
++ : "=m"(mmxsave),
++ "=m"(v)
++ : "m"(offset[0]),
++ "m"(mmxsave));
++ return v;
++}
++
++static inline void mchbar_write64(const uintptr_t x, const uint64_t v)
++{
++ const uint64_t *offset = (uint64_t *)(CONFIG_FIXED_MCHBAR_MMIO_BASE + x);
++ uint64_t mmxsave;
++ asm volatile (
++ "\n\t movq %%mm0, %0"
++ "\n\t movq %2, %%mm0"
++ "\n\t movq %%mm0, %1"
++ "\n\t movq %3, %%mm0"
++ "\n\t emms"
++ : "=m"(mmxsave)
++ : "m"(offset[0]),
++ "m"(v),
++ "m"(mmxsave));
++}
++
+ void raminit_main(enum raminit_boot_mode bootmode);
+
+ enum raminit_status collect_spd_info(struct sysinfo *ctrl);
+@@ -203,6 +283,7 @@ enum raminit_status initialise_mpll(struct sysinfo *ctrl);
+ enum raminit_status convert_timings(struct sysinfo *ctrl);
+ enum raminit_status configure_mc(struct sysinfo *ctrl);
+ enum raminit_status configure_memory_map(struct sysinfo *ctrl);
++enum raminit_status do_jedec_init(struct sysinfo *ctrl);
+
+ void configure_timings(struct sysinfo *ctrl);
+ void configure_refresh(struct sysinfo *ctrl);
+@@ -215,8 +296,28 @@ uint32_t get_tXS_offset(uint32_t mem_clock_mhz);
+ uint32_t get_tZQOPER(uint32_t mem_clock_mhz, bool lpddr);
+ uint32_t get_tZQCS(uint32_t mem_clock_mhz, bool lpddr);
+
++enum raminit_status io_reset(void);
+ enum raminit_status wait_for_first_rcomp(void);
+
++uint16_t encode_ddr3_rttnom(uint32_t rttnom);
++void ddr3_program_mr1(struct sysinfo *ctrl, uint8_t wl_mode, uint8_t q_off);
++enum raminit_status ddr3_jedec_init(struct sysinfo *ctrl);
++
++void reut_issue_mrs(
++ struct sysinfo *ctrl,
++ uint8_t channel,
++ uint8_t rankmask,
++ uint8_t mr,
++ uint16_t val);
++
++void reut_issue_mrs_all(
++ struct sysinfo *ctrl,
++ uint8_t channel,
++ uint8_t mr,
++ const uint16_t val[NUM_SLOTS]);
++
++enum raminit_status reut_issue_zq(struct sysinfo *ctrl, uint8_t chanmask, uint8_t zq_type);
++
+ uint8_t get_rx_bias(const struct sysinfo *ctrl);
+
+ uint8_t get_tCWL(uint32_t mem_clock_mhz);
+diff --git a/src/northbridge/intel/haswell/native_raminit/reg_structs.h b/src/northbridge/intel/haswell/native_raminit/reg_structs.h
+index 70487e1640..9929f617fe 100644
+--- a/src/northbridge/intel/haswell/native_raminit/reg_structs.h
++++ b/src/northbridge/intel/haswell/native_raminit/reg_structs.h
+@@ -335,6 +335,127 @@ union mcscheds_cbit_reg {
+ uint32_t raw;
+ };
+
++union reut_pat_cadb_prog_reg {
++ struct __packed {
++ uint32_t addr : 16; // Bits 15:0
++ uint32_t : 8; // Bits 23:16
++ uint32_t bank : 3; // Bits 26:24
++ uint32_t : 5; // Bits 31:27
++ uint32_t cs : 4; // Bits 35:32
++ uint32_t : 4; // Bits 39:36
++ uint32_t cmd : 3; // Bits 42:40
++ uint32_t : 5; // Bits 47:43
++ uint32_t odt : 4; // Bits 51:48
++ uint32_t : 4; // Bits 55:52
++ uint32_t cke : 4; // Bits 59:56
++ uint32_t : 4; // Bits 63:60
++ };
++ uint64_t raw;
++ uint32_t raw32[2];
++};
++
++union reut_pat_cadb_mrs_reg {
++ struct __packed {
++ uint32_t delay_gap : 3; // Bits 2:0
++ uint32_t : 5; // Bits 7:3
++ uint32_t start_ptr : 3; // Bits 10:8
++ uint32_t : 5; // Bits 15:11
++ uint32_t end_ptr : 3; // Bits 18:16
++ uint32_t : 5; // Bits 23:19
++ uint32_t curr_ptr : 3; // Bits 26:24
++ uint32_t : 5; // Bits 31:27
++ };
++ uint32_t raw;
++};
++
++union reut_seq_cfg_reg {
++ struct __packed {
++ uint32_t : 3; // Bits 2:0
++ uint32_t stop_base_seq_on_wrap_trigger : 1; // Bits 3:3
++ uint32_t : 1; // Bits 4:4
++ uint32_t address_update_rate_mode : 1; // Bits 5:5
++ uint32_t : 1; // Bits 6:6
++ uint32_t enable_dummy_reads : 1; // Bits 7:7
++ uint32_t : 2; // Bits 9:8
++ uint32_t enable_constant_write_strobe : 1; // Bits 10:10
++ uint32_t global_control : 1; // Bits 11:11
++ uint32_t initialization_mode : 2; // Bits 13:12
++ uint32_t : 2; // Bits 15:14
++ uint32_t early_steppings_loop_count : 5; // Bits 20:16 *** Not on C0 ***
++ uint32_t : 3; // Bits 23:21
++ uint32_t subsequence_start_pointer : 3; // Bits 26:24
++ uint32_t : 1; // Bits 27:27
++ uint32_t subsequence_end_pointer : 3; // Bits 30:28
++ uint32_t : 1; // Bits 31:31
++ uint32_t start_test_delay : 10; // Bits 41:32
++ uint32_t : 22; // Bits 63:42
++ };
++ uint64_t raw;
++ uint32_t raw32[2];
++};
++
++union reut_seq_ctl_reg {
++ struct __packed {
++ uint32_t start_test : 1; // Bits 0:0
++ uint32_t stop_test : 1; // Bits 1:1
++ uint32_t clear_errors : 1; // Bits 2:2
++ uint32_t : 1; // Bits 3:3
++ uint32_t stop_on_error : 1; // Bits 4:4
++ uint32_t : 27; // Bits 31:5
++ };
++ uint32_t raw;
++};
++
++union reut_global_err_reg {
++ struct __packed {
++ uint32_t ch_error : 2; // Bits 1:0
++ uint32_t : 14; // Bits 15:2
++ uint32_t ch_test_done : 2; // Bits 17:16
++ uint32_t : 14; // Bits 31:18
++ };
++ uint32_t raw;
++};
++
++union reut_misc_cke_ctrl_reg {
++ struct __packed {
++ uint32_t cke_override : 4; // Bits 3:0
++ uint32_t : 4; // Bits 7:4
++ uint32_t cke_en_start_test_sync : 1; // Bits 8:8
++ uint32_t : 7; // Bits 15:9
++ uint32_t cke_on : 4; // Bits 19:16
++ uint32_t : 12; // Bits 31:20
++ };
++ uint32_t raw;
++};
++
++union reut_misc_odt_ctrl_reg {
++ struct __packed {
++ uint32_t odt_override : 4; // Bits 3:0
++ uint32_t : 12; // Bits 15:4
++ uint32_t odt_on : 4; // Bits 19:16
++ uint32_t : 11; // Bits 30:20
++ uint32_t mpr_train_ddr_on : 1; // Bits 31:31
++ };
++ uint32_t raw;
++};
++
++union mcscheds_dft_misc_reg {
++ struct __packed {
++ uint32_t wdar : 1; // Bits 0:0
++ uint32_t safe_mask_sel : 3; // Bits 3:1
++ uint32_t force_rcv_en : 1; // Bits 4:4
++ uint32_t : 3; // Bits 7:5
++ uint32_t ddr_qualifier : 2; // Bits 9:8
++ uint32_t qualifier_length : 2; // Bits 11:10
++ uint32_t wdb_block_en : 1; // Bits 12:12
++ uint32_t rt_dft_read_ptr : 4; // Bits 16:13
++ uint32_t rt_dft_read_enable : 1; // Bits 17:17
++ uint32_t rt_dft_read_sel_addr : 1; // Bits 18:18
++ uint32_t : 13; // Bits 31:19
++ };
++ uint32_t raw;
++};
++
+ union tc_bank_reg {
+ struct __packed {
+ uint32_t tRCD : 5; // Bits 4:0
+@@ -428,6 +549,18 @@ union tc_srftp_reg {
+ uint32_t raw;
+ };
+
++union tc_mr2_shadow_reg {
++ struct __packed {
++ uint32_t mr2_shadow_low : 6; // Bits 5:0
++ uint32_t srt_available : 2; // Bits 7:6
++ uint32_t mr2_shadow_high : 3; // Bits 10:8
++ uint32_t : 3; // Bits 13:11
++ uint32_t addr_bit_swizzle : 2; // Bits 15:14
++ uint32_t : 16; // Bits 31:16
++ };
++ uint32_t raw;
++};
++
+ union mcmain_command_rate_limit_reg {
+ struct __packed {
+ uint32_t enable_cmd_limit : 1; // Bits 0:0
+@@ -483,6 +616,27 @@ union mad_zr_reg {
+ uint32_t raw;
+ };
+
++union mc_init_state_g_reg {
++ struct __packed {
++ uint32_t pu_mrc_done : 1; // Bits 0:0
++ uint32_t ddr_not_reset : 1; // Bits 1:1
++ uint32_t : 1; // Bits 2:2
++ uint32_t refresh_enable : 1; // Bits 3:3
++ uint32_t : 1; // Bits 4:4
++ uint32_t mc_init_done_ack : 1; // Bits 5:5
++ uint32_t : 1; // Bits 6:6
++ uint32_t mrc_done : 1; // Bits 7:7
++ uint32_t safe_self_refresh : 1; // Bits 8:8
++ uint32_t : 1; // Bits 9:9
++ uint32_t hvm_gate_ddr_reset : 1; // Bits 10:10
++ uint32_t : 11; // Bits 21:11
++ uint32_t dclk_enable : 1; // Bits 22:22
++ uint32_t reset_io : 1; // Bits 23:23
++ uint32_t : 8; // Bits 31:24
++ };
++ uint32_t raw;
++};
++
+ /* Same definition for P_COMP, M_COMP, D_COMP */
+ union pcu_comp_reg {
+ struct __packed {
+diff --git a/src/northbridge/intel/haswell/native_raminit/reut.c b/src/northbridge/intel/haswell/native_raminit/reut.c
+new file mode 100644
+index 0000000000..c55cdd9c7e
+--- /dev/null
++++ b/src/northbridge/intel/haswell/native_raminit/reut.c
+@@ -0,0 +1,196 @@
++/* SPDX-License-Identifier: GPL-2.0-or-later */
++
++#include <console/console.h>
++#include <delay.h>
++#include <northbridge/intel/haswell/haswell.h>
++#include <timer.h>
++#include <types.h>
++
++#include "raminit_native.h"
++
++enum {
++ CADB_CMD_MRS = 0,
++ CADB_CMD_REF = 1,
++ CADB_CMD_PRE = 2,
++ CADB_CMD_ACT = 3,
++ CADB_CMD_WR = 4,
++ CADB_CMD_RD = 5,
++ CADB_CMD_ZQ = 6,
++ CADB_CMD_NOP = 7,
++};
++
++/*
++ * DDR3 rank mirror swaps the following pins: A3<->A4, A5<->A6, A7<->A8, BA0<->BA1
++ *
++ * Note that the swapped bits are contiguous. We can use some XOR magic to swap the bits.
++ * Address lanes are at bits 0..15 and bank selects are at bits 24..26 on the REUT register.
++ */
++#define MIRROR_BITS (BIT(24) | BIT(7) | BIT(5) | BIT(3))
++static uint64_t cadb_prog_rank_mirror(const uint64_t cadb_prog)
++{
++ /* First XOR: find which pairs of bits are different (need swapping) */
++ const uint64_t tmp64 = (cadb_prog ^ (cadb_prog >> 1)) & MIRROR_BITS;
++
++ /* Second XOR: invert the pairs of bits that have different values */
++ return cadb_prog ^ (tmp64 | tmp64 << 1);
++}
++
++static enum raminit_status reut_write_cadb_cmd(
++ struct sysinfo *ctrl,
++ const uint8_t channel,
++ const uint8_t rankmask,
++ const uint8_t cmd,
++ const uint8_t bank,
++ const uint16_t valarr[NUM_SLOTRANKS],
++ const uint8_t delay)
++{
++ union mcscheds_dft_misc_reg dft_misc = {
++ .raw = mchbar_read32(MCSCHEDS_DFT_MISC),
++ };
++ dft_misc.ddr_qualifier = 0;
++ mchbar_write32(MCSCHEDS_DFT_MISC, dft_misc.raw);
++
++ /* Pointer will be dynamically incremented after a write to CADB_PROG register */
++ mchbar_write8(REUT_ch_PAT_CADB_WRITE_PTR(channel), 0);
++
++ uint8_t count = 0;
++ for (uint8_t rank = 0; rank < NUM_SLOTRANKS; rank++) {
++ if (!(ctrl->rankmap[channel] & BIT(rank) & rankmask))
++ continue;
++
++ union reut_pat_cadb_prog_reg reut_cadb_prog = {
++ .addr = valarr[rank],
++ .bank = bank,
++ .cs = ~BIT(rank), /* CS is active low */
++ .cmd = cmd,
++ .cke = 0xf,
++ };
++ if (ctrl->rank_mirrored[channel] & BIT(rank))
++ reut_cadb_prog.raw = cadb_prog_rank_mirror(reut_cadb_prog.raw);
++
++ mchbar_write64(REUT_ch_PAT_CADB_PROG(channel), reut_cadb_prog.raw);
++ count++;
++ }
++ if (!count) {
++ printk(BIOS_ERR, "%s: rankmask is invalid\n", __func__);
++ return RAMINIT_STATUS_UNSPECIFIED_ERROR; /** FIXME: Is this needed? **/
++ }
++ const union reut_pat_cadb_mrs_reg reut_cadb_mrs = {
++ .delay_gap = delay ? delay : 3,
++ .end_ptr = count - 1,
++ };
++ mchbar_write32(REUT_ch_PAT_CADB_MRS(channel), reut_cadb_mrs.raw);
++
++ const uint32_t reut_seq_cfg_save = mchbar_read32(REUT_ch_SEQ_CFG(channel));
++ union reut_seq_cfg_reg reut_seq_cfg = {
++ .raw = reut_seq_cfg_save,
++ };
++ reut_seq_cfg.global_control = 0;
++ reut_seq_cfg.initialization_mode = REUT_MODE_MRS;
++ mchbar_write32(REUT_ch_SEQ_CFG(channel), reut_seq_cfg.raw);
++ mchbar_write32(REUT_ch_SEQ_CTL(channel), (union reut_seq_ctl_reg) {
++ .start_test = 1,
++ .clear_errors = 1,
++ }.raw);
++ enum raminit_status status = RAMINIT_STATUS_SUCCESS;
++ union reut_global_err_reg reut_global_err;
++ struct stopwatch timer;
++ stopwatch_init_msecs_expire(&timer, 100);
++ do {
++ reut_global_err.raw = mchbar_read32(REUT_GLOBAL_ERR);
++ if (reut_global_err.ch_error & BIT(channel)) {
++ printk(BIOS_ERR, "Unexpected REUT error for channel %u\n", channel);
++ status = RAMINIT_STATUS_REUT_ERROR;
++ break;
++ }
++ if (stopwatch_expired(&timer)) {
++ printk(BIOS_ERR, "%s: REUT timed out!\n", __func__);
++ status = RAMINIT_STATUS_POLL_TIMEOUT;
++ break;
++ }
++ } while (!(reut_global_err.ch_test_done & BIT(channel)));
++ mchbar_write32(REUT_ch_SEQ_CTL(channel), (union reut_seq_ctl_reg) {
++ .clear_errors = 1,
++ }.raw);
++ mchbar_write32(REUT_ch_SEQ_CFG(channel), reut_seq_cfg_save);
++ return status;
++}
++
++static enum raminit_status reut_write_cadb_cmd_all(
++ struct sysinfo *ctrl,
++ const uint8_t channel,
++ const uint8_t rankmask,
++ const uint8_t cmd,
++ const uint8_t bank,
++ const uint16_t val,
++ const uint8_t delay)
++{
++ const uint16_t valarr[NUM_SLOTRANKS] = { val, val, val, val };
++ return reut_write_cadb_cmd(ctrl, channel, rankmask, cmd, bank, valarr, delay);
++}
++
++void reut_issue_mrs(
++ struct sysinfo *ctrl,
++ const uint8_t channel,
++ const uint8_t rankmask,
++ const uint8_t mr,
++ const uint16_t val)
++{
++ reut_write_cadb_cmd_all(ctrl, channel, rankmask, CADB_CMD_MRS, mr, val, 0);
++}
++
++void reut_issue_mrs_all(
++ struct sysinfo *ctrl,
++ const uint8_t channel,
++ const uint8_t mr,
++ const uint16_t val[NUM_SLOTS])
++{
++ const uint16_t valarr[NUM_SLOTRANKS] = { val[0], val[0], val[1], val[1] };
++ reut_write_cadb_cmd(ctrl, channel, 0xf, CADB_CMD_MRS, mr, valarr, 0);
++}
++
++enum raminit_status reut_issue_zq(struct sysinfo *ctrl, uint8_t chanmask, uint8_t zq_type)
++{
++ /** TODO: Issuing ZQ commands differs for LPDDR **/
++ if (ctrl->lpddr)
++ die("%s: LPDDR not yet supported in ZQ calibration\n");
++
++ uint8_t opcode; /* NOTE: Only used for LPDDR */
++ uint16_t zq = 0;
++ switch (zq_type) {
++ case ZQ_INIT:
++ zq = BIT(10);
++ opcode = 0xff;
++ break;
++ case ZQ_LONG:
++ zq = BIT(10);
++ opcode = 0xab;
++ break;
++ case ZQ_SHORT:
++ opcode = 0x56;
++ break;
++ case ZQ_RESET:
++ opcode = 0xc3;
++ break;
++ default:
++ die("%s: ZQ type %u is invalid\n", zq_type);
++ }
++
++ /* ZQCS on single-channel needs a longer delay */
++ const uint8_t delay = zq_type == ZQ_SHORT && (!ctrl->dpc[0] || !ctrl->dpc[1]) ? 7 : 1;
++ enum raminit_status status = RAMINIT_STATUS_SUCCESS;
++ for (uint8_t channel = 0; channel < NUM_CHANNELS; channel++) {
++ if (!(BIT(channel) & chanmask) || !does_ch_exist(ctrl, channel))
++ continue;
++
++ status = reut_write_cadb_cmd_all(ctrl, channel, 0xf, CADB_CMD_ZQ, 0, zq, delay);
++ if (status)
++ break;
++ }
++
++ /* Wait a bit after ZQ INIT and ZQCL commands */
++ if (zq)
++ udelay(1);
++
++ return status;
++}
+diff --git a/src/northbridge/intel/haswell/registers/mchbar.h b/src/northbridge/intel/haswell/registers/mchbar.h
+index 2acc5cbbc8..4fc78a7f43 100644
+--- a/src/northbridge/intel/haswell/registers/mchbar.h
++++ b/src/northbridge/intel/haswell/registers/mchbar.h
+@@ -96,15 +96,36 @@
+
+ #define SC_WR_ADD_DELAY_ch(ch) _MCMAIN_C(0x40d0, ch)
+
++#define REUT_ch_MISC_CKE_CTRL(ch) _MCMAIN_C(0x4190, ch)
++
++#define REUT_ch_PAT_CADB_MRS(ch) _MCMAIN_C(0x419c, ch)
++
++#define REUT_ch_PAT_CADB_WRITE_PTR(ch) _MCMAIN_C(0x41bc, ch)
++#define REUT_ch_PAT_CADB_PROG(ch) _MCMAIN_C(0x41c0, ch)
++
+ #define TC_ZQCAL_ch(ch) _MCMAIN_C(0x4290, ch)
+ #define TC_RFP_ch(ch) _MCMAIN_C(0x4294, ch)
+ #define TC_RFTP_ch(ch) _MCMAIN_C(0x4298, ch)
++#define TC_MR2_SHADOW_ch(ch) _MCMAIN_C(0x429c, ch)
+ #define MC_INIT_STATE_ch(ch) _MCMAIN_C(0x42a0, ch)
+ #define TC_SRFTP_ch(ch) _MCMAIN_C(0x42a4, ch)
+
++#define REUT_GLOBAL_ERR 0x4804
++
++#define REUT_ch_SEQ_CFG(ch) (0x48a8 + 8 * (ch))
++
++#define REUT_ch_SEQ_CTL(ch) (0x48b8 + 4 * (ch))
++
+ /* MCMAIN broadcast */
+ #define MCSCHEDS_CBIT 0x4c20
+
++#define MCSCHEDS_DFT_MISC 0x4c30
++
++#define REUT_ERR_DATA_STATUS 0x4ce0
++
++#define REUT_MISC_CKE_CTRL 0x4d90
++#define REUT_MISC_ODT_CTRL 0x4d94
++
+ #define MCMNTS_SC_WDBWM 0x4f8c
+
+ /* MCDECS */
+diff --git a/src/southbridge/intel/lynxpoint/pch.h b/src/southbridge/intel/lynxpoint/pch.h
+index 74b4d50017..16bef5032a 100644
+--- a/src/southbridge/intel/lynxpoint/pch.h
++++ b/src/southbridge/intel/lynxpoint/pch.h
+@@ -586,6 +586,8 @@ void mainboard_config_rcba(void);
+ #define ACPIIRQEN 0x31e0 /* 32bit */
+ #define OIC 0x31fe /* 16bit */
+ #define PRSTS 0x3310 /* 32bit */
++#define PM_CFG2 0x333c /* 32bit */
++#define PM_CFG2_DRAM_RESET_CTL (1 << 26) /* ULT only */
+ #define PMSYNC_CONFIG 0x33c4 /* 32bit */
+ #define PMSYNC_CONFIG2 0x33cc /* 32bit */
+ #define SOFT_RESET_CTRL 0x38f4
+--
+2.39.2
+
diff --git a/config/coreboot/haswell/patches/0017-haswell-NRI-Add-pre-training-steps.patch b/config/coreboot/haswell/patches/0017-haswell-NRI-Add-pre-training-steps.patch
new file mode 100644
index 00000000..e4cea123
--- /dev/null
+++ b/config/coreboot/haswell/patches/0017-haswell-NRI-Add-pre-training-steps.patch
@@ -0,0 +1,384 @@
+From 42e43eb210bbb172af8e5ad064326c4570be8654 Mon Sep 17 00:00:00 2001
+From: Angel Pons <th3fanbus@gmail.com>
+Date: Sat, 7 May 2022 23:12:18 +0200
+Subject: [PATCH 17/26] haswell NRI: Add pre-training steps
+
+Implement pre-training steps, which consist of enabling ECC I/O and
+filling the WDB (Write Data Buffer, stores test patterns) through a
+magic LDAT port.
+
+Change-Id: Ie2e09e3b218c4569ed8de5c5e1b05d491032e0f1
+Signed-off-by: Angel Pons <th3fanbus@gmail.com>
+---
+ .../intel/haswell/native_raminit/Makefile.inc | 1 +
+ .../haswell/native_raminit/raminit_main.c | 34 ++++
+ .../haswell/native_raminit/raminit_native.h | 24 +++
+ .../haswell/native_raminit/reg_structs.h | 45 +++++
+ .../intel/haswell/native_raminit/setup_wdb.c | 159 ++++++++++++++++++
+ .../intel/haswell/registers/mchbar.h | 9 +
+ 6 files changed, 272 insertions(+)
+ create mode 100644 src/northbridge/intel/haswell/native_raminit/setup_wdb.c
+
+diff --git a/src/northbridge/intel/haswell/native_raminit/Makefile.inc b/src/northbridge/intel/haswell/native_raminit/Makefile.inc
+index e9212df9e6..8d7d4e4db0 100644
+--- a/src/northbridge/intel/haswell/native_raminit/Makefile.inc
++++ b/src/northbridge/intel/haswell/native_raminit/Makefile.inc
+@@ -10,5 +10,6 @@ romstage-y += memory_map.c
+ romstage-y += raminit_main.c
+ romstage-y += raminit_native.c
+ romstage-y += reut.c
++romstage-y += setup_wdb.c
+ romstage-y += spd_bitmunching.c
+ romstage-y += timings_refresh.c
+diff --git a/src/northbridge/intel/haswell/native_raminit/raminit_main.c b/src/northbridge/intel/haswell/native_raminit/raminit_main.c
+index 73ff180b8c..5e4674957d 100644
+--- a/src/northbridge/intel/haswell/native_raminit/raminit_main.c
++++ b/src/northbridge/intel/haswell/native_raminit/raminit_main.c
+@@ -13,6 +13,39 @@
+
+ #include "raminit_native.h"
+
++static enum raminit_status pre_training(struct sysinfo *ctrl)
++{
++ /* Skip on S3 resume */
++ if (ctrl->bootmode == BOOTMODE_S3)
++ return RAMINIT_STATUS_SUCCESS;
++
++ for (uint8_t channel = 0; channel < NUM_CHANNELS; channel++) {
++ for (uint8_t slot = 0; slot < NUM_SLOTS; slot++) {
++ if (!rank_in_ch(ctrl, slot + slot, channel))
++ continue;
++
++ printk(RAM_DEBUG, "C%uS%u:\n", channel, slot);
++ printk(RAM_DEBUG, "\tMR0: 0x%04x\n", ctrl->mr0[channel][slot]);
++ printk(RAM_DEBUG, "\tMR1: 0x%04x\n", ctrl->mr1[channel][slot]);
++ printk(RAM_DEBUG, "\tMR2: 0x%04x\n", ctrl->mr2[channel][slot]);
++ printk(RAM_DEBUG, "\tMR3: 0x%04x\n", ctrl->mr3[channel][slot]);
++ printk(RAM_DEBUG, "\n");
++ }
++ if (ctrl->is_ecc) {
++ union mad_dimm_reg mad_dimm = {
++ .raw = mchbar_read32(MAD_DIMM(channel)),
++ };
++ /* Enable ECC I/O */
++ mad_dimm.ecc_mode = 1;
++ mchbar_write32(MAD_DIMM(channel), mad_dimm.raw);
++ /* Wait 4 usec after enabling the ECC I/O, needed by HW */
++ udelay(4);
++ }
++ }
++ setup_wdb(ctrl);
++ return RAMINIT_STATUS_SUCCESS;
++}
++
+ struct task_entry {
+ enum raminit_status (*task)(struct sysinfo *);
+ bool is_enabled;
+@@ -26,6 +59,7 @@ static const struct task_entry cold_boot[] = {
+ { configure_mc, true, "CONFMC", },
+ { configure_memory_map, true, "MEMMAP", },
+ { do_jedec_init, true, "JEDECINIT", },
++ { pre_training, true, "PRETRAIN", },
+ };
+
+ /* Return a generic stepping value to make stepping checks simpler */
+diff --git a/src/northbridge/intel/haswell/native_raminit/raminit_native.h b/src/northbridge/intel/haswell/native_raminit/raminit_native.h
+index e3cf4254a0..f29c2ec366 100644
+--- a/src/northbridge/intel/haswell/native_raminit/raminit_native.h
++++ b/src/northbridge/intel/haswell/native_raminit/raminit_native.h
+@@ -35,6 +35,13 @@
+
+ #define RTTNOM_MASK (BIT(9) | BIT(6) | BIT(2))
+
++#define BASIC_VA_PAT_SPREAD_8 0x01010101
++
++#define WDB_CACHE_LINE_SIZE 8
++
++#define NUM_WDB_CL_MUX_SEEDS 3
++#define NUM_CADB_MUX_SEEDS 3
++
+ /* ZQ calibration types */
+ enum {
+ ZQ_INIT, /* DDR3: ZQCL with tZQinit, LPDDR3: ZQ Init with tZQinit */
+@@ -318,6 +325,23 @@ void reut_issue_mrs_all(
+
+ enum raminit_status reut_issue_zq(struct sysinfo *ctrl, uint8_t chanmask, uint8_t zq_type);
+
++void write_wdb_fixed_pat(
++ const struct sysinfo *ctrl,
++ const uint8_t patterns[],
++ const uint8_t pat_mask[],
++ uint8_t spread,
++ uint16_t start);
++
++void write_wdb_va_pat(
++ const struct sysinfo *ctrl,
++ uint32_t agg_mask,
++ uint32_t vic_mask,
++ uint8_t vic_rot,
++ uint16_t start);
++
++void program_wdb_lfsr(const struct sysinfo *ctrl, bool cleanup);
++void setup_wdb(const struct sysinfo *ctrl);
++
+ uint8_t get_rx_bias(const struct sysinfo *ctrl);
+
+ uint8_t get_tCWL(uint32_t mem_clock_mhz);
+diff --git a/src/northbridge/intel/haswell/native_raminit/reg_structs.h b/src/northbridge/intel/haswell/native_raminit/reg_structs.h
+index 9929f617fe..7aa8d8c8b2 100644
+--- a/src/northbridge/intel/haswell/native_raminit/reg_structs.h
++++ b/src/northbridge/intel/haswell/native_raminit/reg_structs.h
+@@ -335,6 +335,18 @@ union mcscheds_cbit_reg {
+ uint32_t raw;
+ };
+
++union reut_pat_cl_mux_lmn_reg {
++ struct __packed {
++ uint32_t l_data_select : 1; // Bits 0:0
++ uint32_t en_sweep_freq : 1; // Bits 1:1
++ uint32_t : 6; // Bits 7:2
++ uint32_t l_counter : 8; // Bits 15:8
++ uint32_t m_counter : 8; // Bits 23:16
++ uint32_t n_counter : 8; // Bits 31:24
++ };
++ uint32_t raw;
++};
++
+ union reut_pat_cadb_prog_reg {
+ struct __packed {
+ uint32_t addr : 16; // Bits 15:0
+@@ -439,6 +451,39 @@ union reut_misc_odt_ctrl_reg {
+ uint32_t raw;
+ };
+
++union ldat_pdat_reg {
++ struct __packed {
++ uint32_t fast_addr : 12; // Bits 11:0
++ uint32_t : 4; // Bits 15:12
++ uint32_t addr_en : 1; // Bits 16:16
++ uint32_t seq_en : 1; // Bits 17:17
++ uint32_t pol_0 : 1; // Bits 18:18
++ uint32_t pol_1 : 1; // Bits 19:19
++ uint32_t cmd_a : 4; // Bits 23:20
++ uint32_t cmd_b : 4; // Bits 27:24
++ uint32_t cmd_c : 4; // Bits 31:28
++ };
++ uint32_t raw;
++};
++
++union ldat_sdat_reg {
++ struct __packed {
++ uint32_t bank_sel : 4; // Bits 3:0
++ uint32_t : 1; // Bits 4:4
++ uint32_t array_sel : 5; // Bits 9:5
++ uint32_t cmp : 1; // Bits 10:10
++ uint32_t replicate : 1; // Bits 11:11
++ uint32_t dword : 4; // Bits 15:12
++ uint32_t mode : 2; // Bits 17:16
++ uint32_t mpmap : 6; // Bits 23:18
++ uint32_t mpb_offset : 4; // Bits 27:24
++ uint32_t stage_en : 1; // Bits 28:28
++ uint32_t shadow : 2; // Bits 30:29
++ uint32_t : 1; // Bits 31:31
++ };
++ uint32_t raw;
++};
++
+ union mcscheds_dft_misc_reg {
+ struct __packed {
+ uint32_t wdar : 1; // Bits 0:0
+diff --git a/src/northbridge/intel/haswell/native_raminit/setup_wdb.c b/src/northbridge/intel/haswell/native_raminit/setup_wdb.c
+new file mode 100644
+index 0000000000..ec37c48415
+--- /dev/null
++++ b/src/northbridge/intel/haswell/native_raminit/setup_wdb.c
+@@ -0,0 +1,159 @@
++/* SPDX-License-Identifier: GPL-2.0-or-later */
++
++#include <northbridge/intel/haswell/haswell.h>
++#include <types.h>
++
++#include "raminit_native.h"
++
++static void ldat_write_cacheline(
++ const struct sysinfo *const ctrl,
++ const uint8_t chunk,
++ const uint16_t start,
++ const uint64_t data)
++{
++ for (uint8_t channel = 0; channel < NUM_CHANNELS; channel++) {
++ if (!does_ch_exist(ctrl, channel))
++ continue;
++
++ /*
++ * Do not do a 64-bit write here. The register is not aligned
++ * to a 64-bit boundary, which could potentially cause issues.
++ */
++ mchbar_write32(QCLK_ch_LDAT_DATA_IN_x(channel, 0), data & UINT32_MAX);
++ mchbar_write32(QCLK_ch_LDAT_DATA_IN_x(channel, 1), data >> 32);
++ /*
++ * Set REPLICATE = 0 as you don't want to replicate the data.
++ * Set BANK_SEL to the chunk you want to write the 64 bits to.
++ * Set ARRAY_SEL = 0 (the MC WDB) and MODE = 1.
++ */
++ const union ldat_sdat_reg ldat_sdat = {
++ .bank_sel = chunk,
++ .mode = 1,
++ };
++ mchbar_write32(QCLK_ch_LDAT_SDAT(channel), ldat_sdat.raw);
++ /*
++ * Finally, write the PDAT register indicating which cacheline
++ * of the WDB you want to write to by setting FAST_ADDR field
++ * to one of the 64 cache lines. Also set CMD_B in the PDAT
++ * register to 4'b1000, indicating that this is a LDAT write.
++ */
++ const union ldat_pdat_reg ldat_pdat = {
++ .fast_addr = MIN(start, 0xfff),
++ .cmd_b = 8,
++ };
++ mchbar_write32(QCLK_ch_LDAT_PDAT(channel), ldat_pdat.raw);
++ }
++}
++
++static void clear_ldat_mode(const struct sysinfo *const ctrl)
++{
++ for (uint8_t channel = 0; channel < NUM_CHANNELS; channel++)
++ mchbar_write32(QCLK_ch_LDAT_SDAT(channel), 0);
++}
++
++void write_wdb_fixed_pat(
++ const struct sysinfo *const ctrl,
++ const uint8_t patterns[],
++ const uint8_t pat_mask[],
++ const uint8_t spread,
++ const uint16_t start)
++{
++ for (uint8_t chunk = 0; chunk < WDB_CACHE_LINE_SIZE; chunk++) {
++ uint64_t data = 0;
++ for (uint8_t b = 0; b < 64; b++) {
++ const uint8_t beff = b % spread;
++ const uint8_t burst = patterns[pat_mask[beff]];
++ if (burst & BIT(chunk))
++ data |= 1ULL << b;
++ }
++ ldat_write_cacheline(ctrl, chunk, start, data);
++ }
++ clear_ldat_mode(ctrl);
++}
++
++static inline uint32_t rol_u32(const uint32_t val)
++{
++ return (val << 1) | ((val >> 31) & 1);
++}
++
++void write_wdb_va_pat(
++ const struct sysinfo *const ctrl,
++ const uint32_t agg_mask,
++ const uint32_t vic_mask,
++ const uint8_t vic_rot,
++ const uint16_t start)
++{
++ static const uint8_t va_mask_to_compressed[4] = {0xaa, 0xc0, 0xcc, 0xf0};
++ uint32_t v_mask = vic_mask;
++ uint32_t a_mask = agg_mask;
++ for (uint8_t v = 0; v < vic_rot; v++) {
++ uint8_t compressed[32] = {0};
++ /* Iterate through all 32 bits and create a compressed version of cacheline */
++ for (uint8_t b = 0; b < ARRAY_SIZE(compressed); b++) {
++ const uint8_t vic = !!(v_mask & BIT(b));
++ const uint8_t agg = !!(a_mask & BIT(b));
++ const uint8_t index = !vic << 1 | agg << 0;
++ compressed[b] = va_mask_to_compressed[index];
++ }
++ for (uint8_t chunk = 0; chunk < WDB_CACHE_LINE_SIZE; chunk++) {
++ uint32_t data = 0;
++ for (uint8_t b = 0; b < ARRAY_SIZE(compressed); b++)
++ data |= !!(compressed[b] & BIT(chunk)) << b;
++
++ const uint64_t data64 = (uint64_t)data << 32 | data;
++ ldat_write_cacheline(ctrl, chunk, start + v, data64);
++ }
++ v_mask = rol_u32(v_mask);
++ a_mask = rol_u32(a_mask);
++ }
++ clear_ldat_mode(ctrl);
++}
++
++void program_wdb_lfsr(const struct sysinfo *ctrl, const bool cleanup)
++{
++ /* Cleanup LFSR seeds are sequential */
++ const uint32_t cleanup_seeds[NUM_WDB_CL_MUX_SEEDS] = { 0xaaaaaa, 0xcccccc, 0xf0f0f0 };
++ const uint32_t regular_seeds[NUM_WDB_CL_MUX_SEEDS] = { 0xa10ca1, 0xef0d08, 0xad0a1e };
++ const uint32_t *seeds = cleanup ? cleanup_seeds : regular_seeds;
++
++ for (uint8_t channel = 0; channel < NUM_CHANNELS; channel++) {
++ if (!does_ch_exist(ctrl, channel))
++ continue;
++
++ for (uint8_t i = 0; i < NUM_WDB_CL_MUX_SEEDS; i++) {
++ mchbar_write32(REUT_ch_PAT_WDB_CL_MUX_RD_x(channel, i), seeds[i]);
++ mchbar_write32(REUT_ch_PAT_WDB_CL_MUX_WR_x(channel, i), seeds[i]);
++ }
++ }
++}
++
++void setup_wdb(const struct sysinfo *ctrl)
++{
++ const uint32_t amask[9] = {
++ 0x86186186, 0x18618618, 0x30c30c30,
++ 0xa28a28a2, 0x8a28a28a, 0x14514514,
++ 0x28a28a28, 0x92492492, 0x24924924,
++ };
++ const uint32_t vmask = 0x41041041;
++
++ /* Fill first 8 entries with simple 2-LFSR VA pattern */
++ write_wdb_va_pat(ctrl, 0, BASIC_VA_PAT_SPREAD_8, 8, 0);
++
++ /* Fill next 54 entries with 3-LFSR VA pattern */
++ for (uint8_t a = 0; a < ARRAY_SIZE(amask); a++)
++ write_wdb_va_pat(ctrl, amask[a], vmask, 6, 8 + a * 6);
++
++ program_wdb_lfsr(ctrl, false);
++ for (uint8_t channel = 0; channel < NUM_CHANNELS; channel++) {
++ if (!does_ch_exist(ctrl, channel))
++ continue;
++
++ const union reut_pat_cl_mux_lmn_reg wdb_cl_mux_lmn = {
++ .en_sweep_freq = 1,
++ .l_counter = 1,
++ .m_counter = 1,
++ .n_counter = 10,
++ };
++ mchbar_write32(REUT_ch_PAT_WDB_CL_MUX_LMN(channel), wdb_cl_mux_lmn.raw);
++ }
++}
+diff --git a/src/northbridge/intel/haswell/registers/mchbar.h b/src/northbridge/intel/haswell/registers/mchbar.h
+index 4fc78a7f43..f8408e51a0 100644
+--- a/src/northbridge/intel/haswell/registers/mchbar.h
++++ b/src/northbridge/intel/haswell/registers/mchbar.h
+@@ -94,6 +94,11 @@
+ #define TC_BANK_RANK_D_ch(ch) _MCMAIN_C(0x4014, ch)
+ #define SC_ROUNDT_LAT_ch(ch) _MCMAIN_C(0x4024, ch)
+
++#define REUT_ch_PAT_WDB_CL_MUX_WR_x(ch, x) _MCMAIN_C_X(0x4048, ch, x) /* x in 0 .. 2 */
++#define REUT_ch_PAT_WDB_CL_MUX_RD_x(ch, x) _MCMAIN_C_X(0x4054, ch, x) /* x in 0 .. 2 */
++
++#define REUT_ch_PAT_WDB_CL_MUX_LMN(ch) _MCMAIN_C(0x4078, ch)
++
+ #define SC_WR_ADD_DELAY_ch(ch) _MCMAIN_C(0x40d0, ch)
+
+ #define REUT_ch_MISC_CKE_CTRL(ch) _MCMAIN_C(0x4190, ch)
+@@ -110,6 +115,10 @@
+ #define MC_INIT_STATE_ch(ch) _MCMAIN_C(0x42a0, ch)
+ #define TC_SRFTP_ch(ch) _MCMAIN_C(0x42a4, ch)
+
++#define QCLK_ch_LDAT_PDAT(ch) _MCMAIN_C(0x42d0, ch)
++#define QCLK_ch_LDAT_SDAT(ch) _MCMAIN_C(0x42d4, ch)
++#define QCLK_ch_LDAT_DATA_IN_x(ch, x) _MCMAIN_C_X(0x42dc, ch, x) /* x in 0 .. 1 */
++
+ #define REUT_GLOBAL_ERR 0x4804
+
+ #define REUT_ch_SEQ_CFG(ch) (0x48a8 + 8 * (ch))
+--
+2.39.2
+
diff --git a/config/coreboot/haswell/patches/0018-haswell-NRI-Add-REUT-I-O-test-library.patch b/config/coreboot/haswell/patches/0018-haswell-NRI-Add-REUT-I-O-test-library.patch
new file mode 100644
index 00000000..5df22ed3
--- /dev/null
+++ b/config/coreboot/haswell/patches/0018-haswell-NRI-Add-REUT-I-O-test-library.patch
@@ -0,0 +1,1128 @@
+From f4dd460d609276de7cb7db91f145a404451a2301 Mon Sep 17 00:00:00 2001
+From: Angel Pons <th3fanbus@gmail.com>
+Date: Sun, 8 May 2022 00:11:29 +0200
+Subject: [PATCH 18/26] haswell NRI: Add REUT I/O test library
+
+Implement a library to run I/O tests using the REUT hardware.
+
+Change-Id: Id7b207cd0a3989ddd23c88c6b1f0cfa79d2c861f
+Signed-off-by: Angel Pons <th3fanbus@gmail.com>
+---
+ .../intel/haswell/native_raminit/Makefile.inc | 1 +
+ .../haswell/native_raminit/raminit_native.h | 110 +++
+ .../haswell/native_raminit/reg_structs.h | 121 +++
+ .../intel/haswell/native_raminit/testing_io.c | 742 ++++++++++++++++++
+ .../intel/haswell/registers/mchbar.h | 30 +
+ 5 files changed, 1004 insertions(+)
+ create mode 100644 src/northbridge/intel/haswell/native_raminit/testing_io.c
+
+diff --git a/src/northbridge/intel/haswell/native_raminit/Makefile.inc b/src/northbridge/intel/haswell/native_raminit/Makefile.inc
+index 8d7d4e4db0..6e1b365602 100644
+--- a/src/northbridge/intel/haswell/native_raminit/Makefile.inc
++++ b/src/northbridge/intel/haswell/native_raminit/Makefile.inc
+@@ -12,4 +12,5 @@ romstage-y += raminit_native.c
+ romstage-y += reut.c
+ romstage-y += setup_wdb.c
+ romstage-y += spd_bitmunching.c
++romstage-y += testing_io.c
+ romstage-y += timings_refresh.c
+diff --git a/src/northbridge/intel/haswell/native_raminit/raminit_native.h b/src/northbridge/intel/haswell/native_raminit/raminit_native.h
+index f29c2ec366..56df36ca8d 100644
+--- a/src/northbridge/intel/haswell/native_raminit/raminit_native.h
++++ b/src/northbridge/intel/haswell/native_raminit/raminit_native.h
+@@ -58,6 +58,88 @@ enum {
+ REUT_MODE_NOP = 3, /* Normal operation mode */
+ };
+
++/* REUT error counter control */
++enum {
++ COUNT_ERRORS_PER_CHANNEL = 0,
++ COUNT_ERRORS_PER_LANE = 1,
++ COUNT_ERRORS_PER_BYTE_GROUP = 2,
++ COUNT_ERRORS_PER_CHUNK = 3,
++};
++
++enum wdb_dq_pattern {
++ BASIC_VA = 0,
++ SEGMENT_WDB,
++ CADB,
++ TURN_AROUND,
++ LMN_VA,
++ TURN_AROUND_WR,
++ TURN_AROUND_ODT,
++ RD_RD_TA,
++ RD_RD_TA_ALL,
++};
++
++enum reut_cmd_pat {
++ PAT_WR_RD,
++ PAT_WR,
++ PAT_RD,
++ PAT_RD_WR_TA,
++ PAT_WR_RD_TA,
++ PAT_ODT_TA,
++};
++
++/* REUT subsequence types (B = Base, O = Offset) */
++enum {
++ SUBSEQ_B_RD = 0 << 22,
++ SUBSEQ_B_WR = 1 << 22,
++ SUBSEQ_B_RD_WR = 2 << 22,
++ SUBSEQ_B_WR_RD = 3 << 22,
++ SUBSEQ_O_RD = 4 << 22,
++ SUBSEQ_O_WR = 5 << 22,
++};
++
++/* REUT mux control */
++enum {
++ REUT_MUX_LMN = 0,
++ REUT_MUX_BTBUFFER = 1,
++ REUT_MUX_LFSR = 2,
++};
++
++/* Increment scale */
++enum {
++ SCALE_LOGARITHM = 0,
++ SCALE_LINEAR = 1,
++};
++
++enum test_stop {
++ NSOE = 0, /* Never stop on error */
++ NTHSOE = 1, /* Stop on the nth error (we use n = 1) */
++ ABGSOE = 2, /* Stop on all byte groups error */
++ ALSOE = 3, /* Stop on all lanes error */
++};
++
++struct wdb_pat {
++ uint32_t start_ptr; /* Starting pointer in WDB */
++ uint32_t stop_ptr; /* Stopping pointer in WDB */
++ uint16_t inc_rate; /* How quickly the WDB walks through cachelines */
++ uint8_t dq_pattern; /* DQ pattern to use (see enum wdb_dq_pattern above) */
++};
++
++struct reut_pole {
++ uint16_t start;
++ uint16_t stop;
++ uint16_t order;
++ uint32_t inc_rate;
++ uint16_t inc_val;
++ bool wrap_trigger;
++};
++
++struct reut_box {
++ struct reut_pole rank;
++ struct reut_pole bank;
++ struct reut_pole row;
++ struct reut_pole col;
++};
++
+ enum command_training_iteration {
+ CT_ITERATION_CLOCK = 0,
+ CT_ITERATION_CMD_NORTH,
+@@ -199,6 +281,10 @@ struct sysinfo {
+ uint16_t mr1[NUM_CHANNELS][NUM_SLOTRANKS];
+ uint16_t mr2[NUM_CHANNELS][NUM_SLOTRANKS];
+ uint16_t mr3[NUM_CHANNELS][NUM_SLOTRANKS];
++
++ uint8_t dq_pat;
++
++ uint8_t dq_pat_lc;
+ };
+
+ static inline bool is_hsw_ult(void)
+@@ -342,6 +428,30 @@ void write_wdb_va_pat(
+ void program_wdb_lfsr(const struct sysinfo *ctrl, bool cleanup);
+ void setup_wdb(const struct sysinfo *ctrl);
+
++void program_seq_addr(uint8_t channel, const struct reut_box *reut_addr, bool log_seq_addr);
++void program_loop_count(const struct sysinfo *ctrl, uint8_t channel, uint8_t lc_exp);
++
++void setup_io_test(
++ struct sysinfo *ctrl,
++ uint8_t chanmask,
++ enum reut_cmd_pat cmd_pat,
++ uint16_t num_cl,
++ uint8_t lc,
++ const struct reut_box *reut_addr,
++ enum test_stop soe,
++ const struct wdb_pat *pat,
++ uint8_t en_cadb,
++ uint8_t subseq_wait);
++
++void setup_io_test_cadb(struct sysinfo *ctrl, uint8_t chanmask, uint8_t lc, enum test_stop soe);
++void setup_io_test_basic_va(struct sysinfo *ctrl, uint8_t chm, uint8_t lc, enum test_stop soe);
++void setup_io_test_mpr(struct sysinfo *ctrl, uint8_t chanmask, uint8_t lc, enum test_stop soe);
++
++uint8_t select_reut_ranks(struct sysinfo *ctrl, uint8_t channel, uint8_t rankmask);
++
++void run_mpr_io_test(bool clear_errors);
++uint8_t run_io_test(struct sysinfo *ctrl, uint8_t chanmask, uint8_t dq_pat, bool clear_errors);
++
+ uint8_t get_rx_bias(const struct sysinfo *ctrl);
+
+ uint8_t get_tCWL(uint32_t mem_clock_mhz);
+diff --git a/src/northbridge/intel/haswell/native_raminit/reg_structs.h b/src/northbridge/intel/haswell/native_raminit/reg_structs.h
+index 7aa8d8c8b2..b943259b91 100644
+--- a/src/northbridge/intel/haswell/native_raminit/reg_structs.h
++++ b/src/northbridge/intel/haswell/native_raminit/reg_structs.h
+@@ -347,6 +347,54 @@ union reut_pat_cl_mux_lmn_reg {
+ uint32_t raw;
+ };
+
++union reut_err_ctl_reg {
++ struct __packed {
++ uint32_t stop_on_nth_error : 6; // Bits 5:0
++ uint32_t : 6; // Bits 11:6
++ uint32_t stop_on_error_control : 2; // Bits 13:12
++ uint32_t : 2; // Bits 15:14
++ uint32_t selective_err_enable_chunk : 8; // Bits 23:16
++ uint32_t selective_err_enable_cacheline : 8; // Bits 31:24
++ };
++ uint32_t raw;
++};
++
++union reut_pat_cadb_mux_ctrl_reg {
++ struct __packed {
++ uint32_t mux_0_ctrl : 2; // Bits 1:0
++ uint32_t : 2; // Bits 3:2
++ uint32_t mux_1_ctrl : 2; // Bits 5:4
++ uint32_t : 2; // Bits 7:6
++ uint32_t mux_2_ctrl : 2; // Bits 9:8
++ uint32_t : 6; // Bits 15:10
++ uint32_t sel_mux_0_ctrl : 2; // Bits 17:16
++ uint32_t : 2; // Bits 19:18
++ uint32_t sel_mux_1_ctrl : 2; // Bits 21:20
++ uint32_t : 2; // Bits 23:22
++ uint32_t sel_mux_2_ctrl : 2; // Bits 25:24
++ uint32_t : 6; // Bits 31:26
++ };
++ uint32_t raw;
++};
++
++union reut_pat_wdb_cl_mux_cfg_reg {
++ struct __packed {
++ uint32_t mux_0_control : 2; // Bits 1:0
++ uint32_t : 1; // Bits 2:2
++ uint32_t mux_1_control : 2; // Bits 4:3
++ uint32_t : 1; // Bits 5:5
++ uint32_t mux_2_control : 2; // Bits 7:6
++ uint32_t : 6; // Bits 13:8
++ uint32_t ecc_replace_byte_ctl : 1; // Bits 14:14
++ uint32_t ecc_data_source_sel : 1; // Bits 15:15
++ uint32_t save_lfsr_seed_rate : 6; // Bits 21:16
++ uint32_t : 2; // Bits 23:22
++ uint32_t reload_lfsr_seed_rate : 3; // Bits 26:24
++ uint32_t : 5; // Bits 31:27
++ };
++ uint32_t raw;
++};
++
+ union reut_pat_cadb_prog_reg {
+ struct __packed {
+ uint32_t addr : 16; // Bits 15:0
+@@ -366,6 +414,19 @@ union reut_pat_cadb_prog_reg {
+ uint32_t raw32[2];
+ };
+
++union reut_pat_wdb_cl_ctrl_reg {
++ struct __packed {
++ uint32_t inc_rate : 5; // Bits 4:0
++ uint32_t inc_scale : 1; // Bits 5:5
++ uint32_t : 2; // Bits 7:6
++ uint32_t start_ptr : 6; // Bits 13:8
++ uint32_t : 2; // Bits 15:14
++ uint32_t end_ptr : 6; // Bits 21:16
++ uint32_t : 10; // Bits 31:22
++ };
++ uint32_t raw;
++};
++
+ union reut_pat_cadb_mrs_reg {
+ struct __packed {
+ uint32_t delay_gap : 3; // Bits 2:0
+@@ -406,6 +467,66 @@ union reut_seq_cfg_reg {
+ uint32_t raw32[2];
+ };
+
++union reut_seq_base_addr_reg {
++ struct __packed {
++ uint32_t : 3; // Bits 2:0
++ uint32_t col_addr : 8; // Bits 10:3
++ uint32_t : 13; // Bits 23:11
++ uint32_t row_addr : 16; // Bits 39:24
++ uint32_t : 8; // Bits 47:40
++ uint32_t bank_addr : 3; // Bits 50:48
++ uint32_t : 5; // Bits 55:51
++ uint32_t rank_addr : 3; // Bits 58:56
++ uint32_t : 5; // Bits 63:59
++ };
++ uint32_t raw32[2];
++ uint64_t raw;
++};
++
++union reut_seq_misc_ctl_reg {
++ struct __packed {
++ uint32_t col_addr_order : 2; // Bits 1:0
++ uint32_t row_addr_order : 2; // Bits 3:2
++ uint32_t bank_addr_order : 2; // Bits 5:4
++ uint32_t rank_addr_order : 2; // Bits 7:6
++ uint32_t : 5; // Bits 12:8
++ uint32_t addr_invert_rate : 3; // Bits 15:13
++ uint32_t : 4; // Bits 19:16
++ uint32_t col_addr_invert_en : 1; // Bits 20:20
++ uint32_t row_addr_invert_en : 1; // Bits 21:21
++ uint32_t bank_addr_invert_en : 1; // Bits 22:22
++ uint32_t rank_addr_invert_en : 1; // Bits 23:23
++ uint32_t col_wrap_trigger_en : 1; // Bits 24:24
++ uint32_t row_wrap_trigger_en : 1; // Bits 25:25
++ uint32_t bank_wrap_trigger_en : 1; // Bits 26:26
++ uint32_t rank_wrap_trigger_en : 1; // Bits 27:27
++ uint32_t col_wrap_carry_en : 1; // Bits 28:28
++ uint32_t row_wrap_carry_en : 1; // Bits 29:29
++ uint32_t bank_wrap_carry_en : 1; // Bits 30:30
++ uint32_t rank_wrap_carry_en : 1; // Bits 31:31
++ };
++ uint32_t raw;
++};
++
++union reut_seq_addr_inc_ctl_reg {
++ struct __packed {
++ uint32_t : 3; // Bits 2:0
++ uint32_t col_addr_increment : 8; // Bits 10:3
++ uint32_t : 1; // Bits 11:11
++ uint32_t col_addr_update : 8; // Bits 19:12
++ uint32_t row_addr_increment : 12; // Bits 31:20
++ uint32_t row_addr_update : 6; // Bits 37:32
++ uint32_t bank_addr_increment : 3; // Bits 40:38
++ uint32_t : 3; // Bits 43:41
++ uint32_t bank_addr_update : 8; // Bits 53:44
++ uint32_t rank_addr_increment : 3; // Bits 54:52
++ uint32_t : 1; // Bits 55:55
++ uint32_t rank_addr_update : 8; // Bits 63:56
++ };
++ uint64_t raw;
++ uint32_t raw32[2];
++};
++
+ union reut_seq_ctl_reg {
+ struct __packed {
+ uint32_t start_test : 1; // Bits 0:0
+diff --git a/src/northbridge/intel/haswell/native_raminit/testing_io.c b/src/northbridge/intel/haswell/native_raminit/testing_io.c
+new file mode 100644
+index 0000000000..7716fc4285
+--- /dev/null
++++ b/src/northbridge/intel/haswell/native_raminit/testing_io.c
+@@ -0,0 +1,742 @@
++/* SPDX-License-Identifier: GPL-2.0-or-later */
++
++#include <console/console.h>
++#include <delay.h>
++#include <lib.h>
++#include <northbridge/intel/haswell/haswell.h>
++#include <timer.h>
++#include <types.h>
++
++#include "raminit_native.h"
++
++static void set_cadb_patterns(const uint8_t channel, const uint16_t seeds[NUM_CADB_MUX_SEEDS])
++{
++ for (uint8_t i = 0; i < NUM_CADB_MUX_SEEDS; i++)
++ mchbar_write32(REUT_ch_PAT_CADB_MUX_x(channel, i), seeds[i]);
++}
++
++static void setup_cadb(
++ struct sysinfo *ctrl,
++ const uint8_t channel,
++ const uint8_t vic_spread,
++ const uint8_t vic_bit)
++{
++ const bool lmn_en = false;
++
++ /*
++ * Currently, always start writing at CADB row 0.
++ * Could add a start point parameter in the future.
++ */
++ mchbar_write8(REUT_ch_PAT_CADB_WRITE_PTR(channel), 0);
++ const uint8_t num_cadb_rows = 8;
++ for (uint8_t row = 0; row < num_cadb_rows; row++) {
++ const uint8_t lfsr0 = (row >> 0) & 1;
++ const uint8_t lfsr1 = (row >> 1) & 1;
++ uint64_t reg64 = 0;
++ for (uint8_t bit = 0; bit < 22; bit++) {
++ uint8_t bremap;
++ if (bit >= 19) {
++ /* (bremap in 40 .. 42) => CADB data control */
++ bremap = bit + 21;
++ } else if (bit >= 16) {
++ /* (bremap in 24 .. 26) => CADB data bank */
++ bremap = bit + 8;
++ } else {
++ /* (bremap in 0 .. 15) => CADB data address */
++ bremap = bit;
++ }
++ const uint8_t fine = bit % vic_spread;
++ reg64 |= ((uint64_t)(fine == vic_bit ? lfsr0 : lfsr1)) << bremap;
++ }
++ /*
++ * Write row. CADB pointer is auto incremented after every write. This must be
++ * a single 64-bit write, otherwise the CADB pointer will auto-increment twice.
++ */
++ mchbar_write64(REUT_ch_PAT_CADB_PROG(channel), reg64);
++ }
++ const union reut_pat_cadb_mux_ctrl_reg cadb_mux_ctrl = {
++ .mux_0_ctrl = lmn_en ? REUT_MUX_LMN : REUT_MUX_LFSR,
++ .mux_1_ctrl = REUT_MUX_LFSR,
++ .mux_2_ctrl = REUT_MUX_LFSR,
++ };
++ mchbar_write32(REUT_ch_PAT_CADB_MUX_CTRL(channel), cadb_mux_ctrl.raw);
++ const union reut_pat_cl_mux_lmn_reg cadb_cl_mux_lmn = {
++ .en_sweep_freq = 1,
++ .l_counter = 1,
++ .m_counter = 1,
++ .n_counter = 6,
++ };
++ mchbar_write32(REUT_ch_PAT_CADB_CL_MUX_LMN(channel), cadb_cl_mux_lmn.raw);
++ const uint16_t cadb_mux_seeds[NUM_CADB_MUX_SEEDS] = { 0x0ea1, 0xbeef, 0xdead };
++ set_cadb_patterns(channel, cadb_mux_seeds);
++}
++
++static uint32_t calc_rate(const uint32_t rate, const uint32_t lim, const uint8_t scale_bit)
++{
++ return rate > lim ? log2_ceil(rate - 1) : BIT(scale_bit) | rate;
++}
++
++void program_seq_addr(
++ const uint8_t channel,
++ const struct reut_box *reut_addr,
++ const bool log_seq_addr)
++{
++ const int loglevel = log_seq_addr ? BIOS_ERR : BIOS_NEVER;
++ const uint32_t div = 8;
++ union reut_seq_base_addr_reg reut_seq_addr_start = {
++ .col_addr = reut_addr->col.start / div,
++ .row_addr = reut_addr->row.start,
++ .bank_addr = reut_addr->bank.start,
++ .rank_addr = reut_addr->rank.start,
++ };
++ mchbar_write64(REUT_ch_SEQ_ADDR_START(channel), reut_seq_addr_start.raw);
++ reut_seq_addr_start.raw = mchbar_read64(REUT_ch_SEQ_ADDR_START(channel));
++ printk(loglevel, "\tStart column: %u\n", reut_seq_addr_start.col_addr);
++ printk(loglevel, "\tStart row: %u\n", reut_seq_addr_start.row_addr);
++ printk(loglevel, "\tStart bank: %u\n", reut_seq_addr_start.bank_addr);
++ printk(loglevel, "\tStart rank: %u\n", reut_seq_addr_start.rank_addr);
++ printk(loglevel, "\n");
++
++ union reut_seq_base_addr_reg reut_seq_addr_stop = {
++ .col_addr = reut_addr->col.stop / div,
++ .row_addr = reut_addr->row.stop,
++ .bank_addr = reut_addr->bank.stop,
++ .rank_addr = reut_addr->rank.stop,
++ };
++ mchbar_write64(REUT_ch_SEQ_ADDR_WRAP(channel), reut_seq_addr_stop.raw);
++ reut_seq_addr_stop.raw = mchbar_read64(REUT_ch_SEQ_ADDR_WRAP(channel));
++ printk(loglevel, "\tStop column: %u\n", reut_seq_addr_stop.col_addr);
++ printk(loglevel, "\tStop row: %u\n", reut_seq_addr_stop.row_addr);
++ printk(loglevel, "\tStop bank: %u\n", reut_seq_addr_stop.bank_addr);
++ printk(loglevel, "\tStop rank: %u\n", reut_seq_addr_stop.rank_addr);
++ printk(loglevel, "\n");
++
++ union reut_seq_misc_ctl_reg reut_seq_misc_ctl = {
++ .col_wrap_trigger_en = reut_addr->col.wrap_trigger,
++ .row_wrap_trigger_en = reut_addr->row.wrap_trigger,
++ .bank_wrap_trigger_en = reut_addr->bank.wrap_trigger,
++ .rank_wrap_trigger_en = reut_addr->rank.wrap_trigger,
++ };
++ mchbar_write32(REUT_ch_SEQ_MISC_CTL(channel), reut_seq_misc_ctl.raw);
++ printk(loglevel, "\tWrap column: %u\n", reut_addr->col.wrap_trigger);
++ printk(loglevel, "\tWrap row: %u\n", reut_addr->row.wrap_trigger);
++ printk(loglevel, "\tWrap bank: %u\n", reut_addr->bank.wrap_trigger);
++ printk(loglevel, "\tWrap rank: %u\n", reut_addr->rank.wrap_trigger);
++ printk(loglevel, "\n");
++
++ union reut_seq_addr_inc_ctl_reg reut_seq_addr_inc_ctl = {
++ .col_addr_update = calc_rate(reut_addr->col.inc_rate, 31, 7),
++ .row_addr_update = calc_rate(reut_addr->row.inc_rate, 15, 5),
++ .bank_addr_update = calc_rate(reut_addr->bank.inc_rate, 31, 7),
++ .rank_addr_update = calc_rate(reut_addr->rank.inc_rate, 31, 7),
++ .col_addr_increment = reut_addr->col.inc_val,
++ .row_addr_increment = reut_addr->row.inc_val,
++ .bank_addr_increment = reut_addr->bank.inc_val,
++ .rank_addr_increment = reut_addr->rank.inc_val,
++ };
++ printk(loglevel, "\tUpdRate column: %u\n", reut_addr->col.inc_rate);
++ printk(loglevel, "\tUpdRate row: %u\n", reut_addr->row.inc_rate);
++ printk(loglevel, "\tUpdRate bank: %u\n", reut_addr->bank.inc_rate);
++ printk(loglevel, "\tUpdRate rank: %u\n", reut_addr->rank.inc_rate);
++ printk(loglevel, "\n");
++ printk(loglevel, "\tUpdRateCR column: %u\n", reut_seq_addr_inc_ctl.col_addr_update);
++ printk(loglevel, "\tUpdRateCR row: %u\n", reut_seq_addr_inc_ctl.row_addr_update);
++ printk(loglevel, "\tUpdRateCR bank: %u\n", reut_seq_addr_inc_ctl.bank_addr_update);
++ printk(loglevel, "\tUpdRateCR rank: %u\n", reut_seq_addr_inc_ctl.rank_addr_update);
++ printk(loglevel, "\n");
++ printk(loglevel, "\tUpdInc column: %u\n", reut_seq_addr_inc_ctl.col_addr_increment);
++ printk(loglevel, "\tUpdInc row: %u\n", reut_seq_addr_inc_ctl.row_addr_increment);
++ printk(loglevel, "\tUpdInc bank: %u\n", reut_seq_addr_inc_ctl.bank_addr_increment);
++ printk(loglevel, "\tUpdInc rank: %u\n", reut_seq_addr_inc_ctl.rank_addr_increment);
++ printk(loglevel, "\n");
++ mchbar_write64(REUT_ch_SEQ_ADDR_INC_CTL(channel), reut_seq_addr_inc_ctl.raw);
++}
++
++/*
++ * Early steppings take exponential (base 2) loopcount values,
++ * but later steppings take linear loopcount values elsewhere.
++ * Address the differences in register offset and format here.
++ */
++void program_loop_count(const struct sysinfo *ctrl, const uint8_t channel, const uint8_t lc_exp)
++{
++ if (ctrl->stepping >= STEPPING_C0) {
++ const uint32_t loopcount = lc_exp >= 32 ? 0 : BIT(lc_exp);
++ mchbar_write32(HSW_REUT_ch_SEQ_LOOP_COUNT(channel), loopcount);
++ } else {
++ const uint8_t loopcount = lc_exp >= 32 ? 0 : lc_exp + 1;
++ union reut_seq_cfg_reg reut_seq_cfg = {
++ .raw = mchbar_read64(REUT_ch_SEQ_CFG(channel)),
++ };
++ reut_seq_cfg.early_steppings_loop_count = loopcount;
++ mchbar_write64(REUT_ch_SEQ_CFG(channel), reut_seq_cfg.raw);
++ }
++}
++
++static inline void write_subseq(const uint8_t channel, const uint8_t idx, const uint32_t ssq)
++{
++ mchbar_write32(REUT_ch_SUBSEQ_x_CTL(channel, idx), ssq);
++}
++
++static void program_subseq(
++ struct sysinfo *const ctrl,
++ const uint8_t channel,
++ const enum reut_cmd_pat cmd_pat,
++ const uint32_t ss_a,
++ const uint32_t ss_b)
++{
++ switch (cmd_pat) {
++ case PAT_WR_RD_TA:
++ write_subseq(channel, 0, ss_a | SUBSEQ_B_WR);
++ for (uint8_t i = 1; i < 7; i++)
++ write_subseq(channel, i, ss_b | SUBSEQ_B_RD_WR);
++
++ write_subseq(channel, 7, ss_a | SUBSEQ_B_RD);
++ break;
++ case PAT_RD_WR_TA:
++ write_subseq(channel, 0, ss_b | SUBSEQ_B_WR_RD);
++ break;
++ case PAT_ODT_TA:
++ write_subseq(channel, 0, ss_a | SUBSEQ_B_WR);
++ write_subseq(channel, 1, ss_b | SUBSEQ_B_RD_WR);
++ write_subseq(channel, 2, ss_a | SUBSEQ_B_RD);
++ write_subseq(channel, 3, ss_b | SUBSEQ_B_WR_RD);
++ break;
++ default:
++ write_subseq(channel, 0, ss_a | SUBSEQ_B_WR);
++ write_subseq(channel, 1, ss_a | SUBSEQ_B_RD);
++ break;
++ }
++}
++
++void setup_io_test(
++ struct sysinfo *ctrl,
++ const uint8_t chanmask,
++ const enum reut_cmd_pat cmd_pat,
++ const uint16_t num_cl,
++ const uint8_t lc,
++ const struct reut_box *const reut_addr,
++ const enum test_stop soe,
++ const struct wdb_pat *const pat,
++ const uint8_t en_cadb,
++ const uint8_t subseq_wait)
++{
++ if (!chanmask)
++ die("\n%s: invalid chanmask\n", __func__, chanmask);
++
++ /*
++ * Prepare variables needed for both channels.
++ * Check for the cases where this MUST be 1: when
++ * we manually walk through subseq ODT and TA Wr.
++ */
++ uint8_t lc_exp = MAX(lc - log2_ceil(num_cl), 0);
++ if (cmd_pat == PAT_WR_RD_TA || cmd_pat == PAT_ODT_TA)
++ lc_exp = 0;
++
++ uint8_t num_clcr;
++ if (num_cl > 127) {
++ /* Assume exponential number */
++ num_clcr = log2_ceil(num_cl);
++ } else {
++ /* Set number of cache lines as linear number */
++ num_clcr = num_cl | BIT(7);
++ }
++
++ const uint16_t num_cl2 = 2 * num_cl;
++ uint8_t num_cl2cr;
++ if (num_cl2 > 127) {
++ /* Assume exponential number */
++ num_cl2cr = log2_ceil(num_cl2);
++ } else {
++ /* Set number of cache lines as linear number */
++ num_cl2cr = num_cl2 | BIT(7);
++ }
++
++ for (uint8_t channel = 0; channel < NUM_CHANNELS; channel++) {
++ if (!(chanmask & BIT(channel))) {
++ union reut_seq_cfg_reg reut_seq_cfg = {
++ .raw = mchbar_read64(REUT_ch_SEQ_CFG(channel)),
++ };
++ reut_seq_cfg.global_control = 0;
++ mchbar_write64(REUT_ch_SEQ_CFG(channel), reut_seq_cfg.raw);
++ continue;
++ }
++
++ /*
++ * Program CADB
++ */
++ mchbar_write8(REUT_ch_MISC_PAT_CADB_CTRL(channel), !!en_cadb);
++ if (en_cadb)
++ setup_cadb(ctrl, channel, 7, 8);
++
++ /*
++ * Program sequence
++ */
++ uint8_t subseq_start = 0;
++ uint8_t subseq_end = 0;
++ switch (cmd_pat) {
++ case PAT_WR_RD:
++ subseq_end = 1;
++ break;
++ case PAT_WR:
++ break;
++ case PAT_RD:
++ subseq_start = 1;
++ subseq_end = 1;
++ break;
++ case PAT_RD_WR_TA:
++ break;
++ case PAT_WR_RD_TA:
++ subseq_end = 7;
++ break;
++ case PAT_ODT_TA:
++ subseq_end = 3;
++ break;
++ default:
++ die("\n%s: Pattern type %u is invalid\n", __func__, cmd_pat);
++ }
++ const union reut_seq_cfg_reg reut_seq_cfg = {
++ .global_control = 1,
++ .initialization_mode = REUT_MODE_TEST,
++ .subsequence_start_pointer = subseq_start,
++ .subsequence_end_pointer = subseq_end,
++ .start_test_delay = 2,
++ };
++ mchbar_write64(REUT_ch_SEQ_CFG(channel), reut_seq_cfg.raw);
++ program_loop_count(ctrl, channel, lc_exp);
++ mchbar_write32(REUT_ch_SEQ_CTL(channel), (union reut_seq_ctl_reg) {
++ .clear_errors = 1,
++ }.raw);
++
++ /*
++ * Program subsequences
++ */
++ uint32_t subseq_a = 0;
++
++ /* Number of cachelines and scale */
++ subseq_a |= (num_clcr & 0x00ff) << 0;
++ subseq_a |= (subseq_wait & 0x3fff) << 8;
++
++ /* Reset current base address to start */
++ subseq_a |= BIT(27);
++
++ uint32_t subseq_b = 0;
++
++ /* Number of cachelines and scale */
++ subseq_b |= (num_cl2cr & 0x00ff) << 0;
++ subseq_b |= (subseq_wait & 0x3fff) << 8;
++
++ /* Reset current base address to start */
++ subseq_b |= BIT(27);
++
++ program_subseq(ctrl, channel, cmd_pat, subseq_a, subseq_b);
++
++ /* Program sequence address */
++ program_seq_addr(channel, reut_addr, false);
++
++ /* Program WDB */
++ const bool is_linear = pat->inc_rate < 32;
++ mchbar_write32(REUT_ch_WDB_CL_CTRL(channel), (union reut_pat_wdb_cl_ctrl_reg) {
++ .start_ptr = pat->start_ptr,
++ .end_ptr = pat->stop_ptr,
++ .inc_rate = is_linear ? pat->inc_rate : log2_ceil(pat->inc_rate),
++ .inc_scale = is_linear,
++ }.raw);
++
++ /* Enable LMN in LMN or CADB modes, used to create lots of supply noise */
++ const bool use_lmn = pat->dq_pattern == LMN_VA || pat->dq_pattern == CADB;
++ union reut_pat_wdb_cl_mux_cfg_reg pat_wdb_cl_mux_cfg = {
++ .mux_0_control = use_lmn ? REUT_MUX_LMN : REUT_MUX_LFSR,
++ .mux_1_control = REUT_MUX_LFSR,
++ .mux_2_control = REUT_MUX_LFSR,
++ .ecc_data_source_sel = 1,
++ };
++
++ /* Program LFSR save/restore, too complex unless everything is power of 2 */
++ if (cmd_pat == PAT_ODT_TA || cmd_pat == PAT_WR_RD_TA) {
++ pat_wdb_cl_mux_cfg.reload_lfsr_seed_rate = log2_ceil(num_cl) + 1;
++ pat_wdb_cl_mux_cfg.save_lfsr_seed_rate = 1;
++ }
++ mchbar_write32(REUT_ch_PAT_WDB_CL_MUX_CFG(channel), pat_wdb_cl_mux_cfg.raw);
++
++ /* Inversion mask is not used */
++ mchbar_write32(REUT_ch_PAT_WDB_INV(channel), 0);
++
++ /* Program error checking */
++ const union reut_err_ctl_reg reut_err_ctl = {
++ .selective_err_enable_cacheline = 0xff,
++ .selective_err_enable_chunk = 0xff,
++ .stop_on_error_control = soe,
++ .stop_on_nth_error = 1,
++ };
++ mchbar_write32(REUT_ch_ERR_CONTROL(channel), reut_err_ctl.raw);
++ mchbar_write64(REUT_ch_ERR_DATA_MASK(channel), 0);
++ mchbar_write8(REUT_ch_ERR_ECC_MASK(channel), 0);
++ }
++
++ /* Always do a ZQ short before the beginning of a test */
++ reut_issue_zq(ctrl, chanmask, ZQ_SHORT);
++}
++
++void setup_io_test_cadb(
++ struct sysinfo *ctrl,
++ const uint8_t chanmask,
++ const uint8_t lc,
++ const enum test_stop soe)
++{
++ const struct reut_box reut_addr = {
++ .rank = {
++ .start = 0,
++ .stop = 0,
++ .inc_rate = 32,
++ .inc_val = 1,
++ },
++ .bank = {
++ .start = 0,
++ .stop = 7,
++ .inc_rate = 3,
++ .inc_val = 1,
++ },
++ .row = {
++ .start = 0,
++ .stop = 2047,
++ .inc_rate = 3,
++ .inc_val = 73,
++ },
++ .col = {
++ .start = 0,
++ .stop = 1023,
++ .inc_rate = 0,
++ .inc_val = 53,
++ },
++ };
++ const struct wdb_pat pattern = {
++ .start_ptr = 0,
++ .stop_ptr = 9,
++ .inc_rate = 4,
++ .dq_pattern = CADB,
++ };
++ setup_io_test(
++ ctrl,
++ chanmask,
++ PAT_WR_RD,
++ 128,
++ lc,
++ &reut_addr,
++ soe,
++ &pattern,
++ 1,
++ 0);
++
++ ctrl->dq_pat_lc = MAX(lc - 2 - 3, 0) + 1;
++ ctrl->dq_pat = CADB;
++}
++
++void setup_io_test_basic_va(
++ struct sysinfo *ctrl,
++ const uint8_t chanmask,
++ const uint8_t lc,
++ const enum test_stop soe)
++{
++ const uint32_t spread = 8;
++ const struct reut_box reut_addr = {
++ .rank = {
++ .start = 0,
++ .stop = 0,
++ .inc_rate = 32,
++ .inc_val = 1,
++ },
++ .col = {
++ .start = 0,
++ .stop = 1023,
++ .inc_rate = 0,
++ .inc_val = 1,
++ },
++ };
++ const struct wdb_pat pattern = {
++ .start_ptr = 0,
++ .stop_ptr = spread - 1,
++ .inc_rate = 4,
++ .dq_pattern = BASIC_VA,
++ };
++ setup_io_test(
++ ctrl,
++ chanmask,
++ PAT_WR_RD,
++ 128,
++ lc,
++ &reut_addr,
++ soe,
++ &pattern,
++ 0,
++ 0);
++
++ ctrl->dq_pat_lc = MAX(lc - 8, 0) + 1;
++ ctrl->dq_pat = BASIC_VA;
++}
++
++void setup_io_test_mpr(
++ struct sysinfo *ctrl,
++ const uint8_t chanmask,
++ const uint8_t lc,
++ const enum test_stop soe)
++{
++ const struct reut_box reut_addr_ddr = {
++ .rank = {
++ .start = 0,
++ .stop = 0,
++ .inc_rate = 32,
++ .inc_val = 1,
++ },
++ .col = {
++ .start = 0,
++ .stop = 1023,
++ .inc_rate = 0,
++ .inc_val = 1,
++ },
++ };
++ const struct reut_box reut_addr_lpddr = {
++ .bank = {
++ .start = 4,
++ .stop = 4,
++ .inc_rate = 0,
++ .inc_val = 0,
++ },
++ };
++ const struct wdb_pat pattern = {
++ .start_ptr = 0,
++ .stop_ptr = 9,
++ .inc_rate = 4,
++ .dq_pattern = BASIC_VA,
++ };
++ setup_io_test(
++ ctrl,
++ chanmask,
++ PAT_RD,
++ 128,
++ lc,
++ ctrl->lpddr ? &reut_addr_lpddr : &reut_addr_ddr,
++ soe,
++ &pattern,
++ 0,
++ 0);
++
++ ctrl->dq_pat_lc = 1;
++ ctrl->dq_pat = BASIC_VA;
++}
++
++uint8_t select_reut_ranks(struct sysinfo *ctrl, const uint8_t channel, uint8_t rankmask)
++{
++ rankmask &= ctrl->rankmap[channel];
++
++ uint8_t rank_count = 0;
++ uint32_t rank_log_to_phys = 0;
++ for (uint8_t rank = 0; rank < NUM_SLOTRANKS; rank++) {
++ if (!rank_in_mask(rank, rankmask))
++ continue;
++
++ rank_log_to_phys |= rank << (4 * rank_count);
++ rank_count++;
++ }
++ mchbar_write32(REUT_ch_RANK_LOG_TO_PHYS(channel), rank_log_to_phys);
++
++ union reut_seq_cfg_reg reut_seq_cfg = {
++ .raw = mchbar_read64(REUT_ch_SEQ_CFG(channel)),
++ };
++ if (!rank_count) {
++ reut_seq_cfg.global_control = 0;
++ mchbar_write64(REUT_ch_SEQ_CFG(channel), reut_seq_cfg.raw);
++ return 0;
++ }
++ union reut_seq_base_addr_reg reut_seq_addr_stop = {
++ .raw = mchbar_read64(REUT_ch_SEQ_ADDR_WRAP(channel)),
++ };
++ reut_seq_addr_stop.rank_addr = rank_count - 1;
++ mchbar_write64(REUT_ch_SEQ_ADDR_WRAP(channel), reut_seq_addr_stop.raw);
++
++ reut_seq_cfg.global_control = 1;
++ mchbar_write64(REUT_ch_SEQ_CFG(channel), reut_seq_cfg.raw);
++ return BIT(channel);
++}
++
++void run_mpr_io_test(const bool clear_errors)
++{
++ io_reset();
++ mchbar_write32(REUT_GLOBAL_CTL, (union reut_seq_ctl_reg) {
++ .start_test = 1,
++ .clear_errors = clear_errors,
++ }.raw);
++ tick_delay(2);
++ io_reset();
++ tick_delay(2);
++ mchbar_write32(REUT_GLOBAL_CTL, (union reut_seq_ctl_reg) {
++ .stop_test = 1,
++ }.raw);
++}
++
++static uint8_t get_num_tests(const uint8_t dq_pat)
++{
++ switch (dq_pat) {
++ case SEGMENT_WDB: return 4;
++ case CADB: return 7;
++ case TURN_AROUND_WR: return 8;
++ case TURN_AROUND_ODT: return 4;
++ case RD_RD_TA: return 2;
++ case RD_RD_TA_ALL: return 8;
++ default: return 1;
++ }
++}
++
++uint8_t run_io_test(
++ struct sysinfo *const ctrl,
++ const uint8_t chanmask,
++ const uint8_t dq_pat,
++ const bool clear_errors)
++{
++ /* SEGMENT_WDB only runs 4 tests */
++ const uint8_t segment_wdb_lc[4] = { 0, 0, 4, 2 };
++ const union reut_pat_wdb_cl_ctrl_reg pat_wdb_cl[4] = {
++ [0] = {
++ .start_ptr = 0,
++ .end_ptr = 9,
++ .inc_rate = 25,
++ .inc_scale = SCALE_LINEAR,
++ },
++ [1] = {
++ .start_ptr = 0,
++ .end_ptr = 9,
++ .inc_rate = 25,
++ .inc_scale = SCALE_LINEAR,
++ },
++ [2] = {
++ .start_ptr = 10,
++ .end_ptr = 63,
++ .inc_rate = 19,
++ .inc_scale = SCALE_LINEAR,
++ },
++ [3] = {
++ .start_ptr = 10,
++ .end_ptr = 63,
++ .inc_rate = 10,
++ .inc_scale = SCALE_LINEAR,
++ },
++ };
++ const bool is_turnaround = dq_pat == RD_RD_TA || dq_pat == RD_RD_TA_ALL;
++ const uint8_t num_tests = get_num_tests(dq_pat);
++ union tc_bank_rank_a_reg tc_bank_rank_a[NUM_CHANNELS] = { 0 };
++ if (is_turnaround) {
++ for (uint8_t channel = 0; channel < NUM_CHANNELS; channel++) {
++ if (!(chanmask & BIT(channel)))
++ continue;
++
++ tc_bank_rank_a[channel].raw = ctrl->tc_bankrank_a[channel].raw;
++ }
++ }
++ for (uint8_t t = 0; t < num_tests; t++) {
++ for (uint8_t channel = 0; channel < NUM_CHANNELS; channel++) {
++ if (!(chanmask & BIT(channel)))
++ continue;
++
++ if (dq_pat == SEGMENT_WDB) {
++ mchbar_write32(REUT_ch_WDB_CL_CTRL(channel), pat_wdb_cl[t].raw);
++ /*
++ * Skip programming LFSR save/restore. Too complex
++ * unless power of 2. Program desired loopcount.
++ */
++ const uint8_t pat_lc = ctrl->dq_pat_lc + segment_wdb_lc[t];
++ program_loop_count(ctrl, channel, pat_lc);
++ } else if (dq_pat == CADB) {
++ setup_cadb(ctrl, channel, num_tests, t);
++ } else if (dq_pat == TURN_AROUND_WR || dq_pat == TURN_AROUND_ODT) {
++ union reut_seq_cfg_reg reut_seq_cfg = {
++ .raw = mchbar_read64(REUT_ch_SEQ_CFG(channel)),
++ };
++ reut_seq_cfg.subsequence_start_pointer = t;
++ reut_seq_cfg.subsequence_end_pointer = t;
++ mchbar_write64(REUT_ch_SEQ_CFG(channel), reut_seq_cfg.raw);
++ union reut_seq_addr_inc_ctl_reg addr_inc_ctl = {
++ .raw = mchbar_read64(REUT_ch_SEQ_ADDR_INC_CTL(channel)),
++ };
++ uint8_t ta_inc_rate = 1;
++ if (dq_pat == TURN_AROUND_WR && (t == 0 || t == 7))
++ ta_inc_rate = 0;
++ else if (dq_pat == TURN_AROUND_ODT && (t == 0 || t == 2))
++ ta_inc_rate = 0;
++
++ /* Program increment rate as linear value */
++ addr_inc_ctl.rank_addr_update = BIT(7) | ta_inc_rate;
++ addr_inc_ctl.col_addr_update = BIT(7) | ta_inc_rate;
++ mchbar_write64(REUT_ch_SEQ_ADDR_INC_CTL(channel),
++ addr_inc_ctl.raw);
++ } else if (dq_pat == RD_RD_TA) {
++ tc_bank_rank_a[channel].tRDRD_sr = (t == 0) ? 4 : 5;
++ mchbar_write32(TC_BANK_RANK_A_ch(channel),
++ tc_bank_rank_a[channel].raw);
++ } else if (dq_pat == RD_RD_TA_ALL) {
++ /*
++ * Program tRDRD for SR and DR. Run 8 tests, covering
++ * tRDRD_sr = 4, 5, 6, 7 and tRDRD_dr = min, +1, +2, +3
++ */
++ const uint32_t tRDRD_dr = ctrl->tc_bankrank_a[channel].tRDRD_dr;
++ tc_bank_rank_a[channel].tRDRD_sr = (t % 4) + 4;
++ tc_bank_rank_a[channel].tRDRD_dr = (t % 4) + tRDRD_dr;
++ mchbar_write32(TC_BANK_RANK_A_ch(channel),
++ tc_bank_rank_a[channel].raw);
++
++ /* Program linear rank increment rate */
++ union reut_seq_addr_inc_ctl_reg addr_inc_ctl = {
++ .raw = mchbar_read64(REUT_ch_SEQ_ADDR_INC_CTL(channel)),
++ };
++ addr_inc_ctl.rank_addr_update = BIT(7) | (t / 4) ? 0 : 31;
++ mchbar_write64(REUT_ch_SEQ_ADDR_INC_CTL(channel),
++ addr_inc_ctl.raw);
++ }
++ }
++ bool test_soe = false;
++ for (uint8_t channel = 0; channel < NUM_CHANNELS; channel++) {
++ if (!(chanmask & BIT(channel)))
++ continue;
++
++ const union reut_err_ctl_reg reut_err_ctl = {
++ .raw = mchbar_read32(REUT_ch_ERR_CONTROL(channel)),
++ };
++ const uint8_t soe = reut_err_ctl.stop_on_error_control;
++ if (soe != NSOE) {
++ test_soe = true;
++ break;
++ }
++ }
++ io_reset();
++ mchbar_write32(REUT_GLOBAL_CTL, (union reut_seq_ctl_reg) {
++ .start_test = 1,
++ .clear_errors = clear_errors && t == 0,
++ }.raw);
++ struct mono_time prev, curr;
++ timer_monotonic_get(&prev);
++ union reut_global_err_reg global_err;
++ do {
++ global_err.raw = mchbar_read32(REUT_GLOBAL_ERR);
++ /** TODO: Clean up this mess **/
++ timer_monotonic_get(&curr);
++ if (mono_time_diff_microseconds(&prev, &curr) > 1000 * 1000) {
++ mchbar_write32(REUT_GLOBAL_CTL, (union reut_seq_ctl_reg) {
++ .stop_test = 1,
++ }.raw);
++ printk(BIOS_ERR, "REUT timed out, ch_done: %x\n",
++ global_err.ch_test_done);
++ break;
++ }
++ } while ((global_err.ch_test_done & chanmask) != chanmask);
++ if (test_soe && global_err.ch_error & chanmask)
++ break;
++ }
++ if (is_turnaround) {
++ for (uint8_t channel = 0; channel < NUM_CHANNELS; channel++) {
++ if (!(chanmask & BIT(channel)))
++ continue;
++
++ mchbar_write32(TC_BANK_RANK_A_ch(channel),
++ ctrl->tc_bankrank_a[channel].raw);
++ }
++ }
++ return ((union reut_global_err_reg)mchbar_read32(REUT_GLOBAL_ERR)).ch_error;
++}
+diff --git a/src/northbridge/intel/haswell/registers/mchbar.h b/src/northbridge/intel/haswell/registers/mchbar.h
+index f8408e51a0..817a9f8bf8 100644
+--- a/src/northbridge/intel/haswell/registers/mchbar.h
++++ b/src/northbridge/intel/haswell/registers/mchbar.h
+@@ -94,20 +94,35 @@
+ #define TC_BANK_RANK_D_ch(ch) _MCMAIN_C(0x4014, ch)
+ #define SC_ROUNDT_LAT_ch(ch) _MCMAIN_C(0x4024, ch)
+
++#define REUT_ch_PAT_WDB_CL_MUX_CFG(ch) _MCMAIN_C(0x4040, ch)
++
+ #define REUT_ch_PAT_WDB_CL_MUX_WR_x(ch, x) _MCMAIN_C_X(0x4048, ch, x) /* x in 0 .. 2 */
+ #define REUT_ch_PAT_WDB_CL_MUX_RD_x(ch, x) _MCMAIN_C_X(0x4054, ch, x) /* x in 0 .. 2 */
+
+ #define REUT_ch_PAT_WDB_CL_MUX_LMN(ch) _MCMAIN_C(0x4078, ch)
+
++#define REUT_ch_PAT_WDB_INV(ch) _MCMAIN_C(0x4084, ch)
++
++#define REUT_ch_ERR_CONTROL(ch) _MCMAIN_C(0x4098, ch)
++#define REUT_ch_ERR_ECC_MASK(ch) _MCMAIN_C(0x409c, ch)
++
+ #define SC_WR_ADD_DELAY_ch(ch) _MCMAIN_C(0x40d0, ch)
+
++#define REUT_ch_ERR_DATA_MASK(ch) _MCMAIN_C(0x40d8, ch)
++
+ #define REUT_ch_MISC_CKE_CTRL(ch) _MCMAIN_C(0x4190, ch)
+
++#define REUT_ch_MISC_PAT_CADB_CTRL(ch) _MCMAIN_C(0x4198, ch)
+ #define REUT_ch_PAT_CADB_MRS(ch) _MCMAIN_C(0x419c, ch)
++#define REUT_ch_PAT_CADB_MUX_CTRL(ch) _MCMAIN_C(0x41a0, ch)
++#define REUT_ch_PAT_CADB_MUX_x(ch, x) _MCMAIN_C_X(0x41a4, ch, x) /* x in 0 .. 2 */
+
++#define REUT_ch_PAT_CADB_CL_MUX_LMN(ch) _MCMAIN_C(0x41b0, ch)
+ #define REUT_ch_PAT_CADB_WRITE_PTR(ch) _MCMAIN_C(0x41bc, ch)
+ #define REUT_ch_PAT_CADB_PROG(ch) _MCMAIN_C(0x41c0, ch)
+
++#define REUT_ch_WDB_CL_CTRL(ch) _MCMAIN_C(0x4200, ch)
++
+ #define TC_ZQCAL_ch(ch) _MCMAIN_C(0x4290, ch)
+ #define TC_RFP_ch(ch) _MCMAIN_C(0x4294, ch)
+ #define TC_RFTP_ch(ch) _MCMAIN_C(0x4298, ch)
+@@ -119,12 +134,27 @@
+ #define QCLK_ch_LDAT_SDAT(ch) _MCMAIN_C(0x42d4, ch)
+ #define QCLK_ch_LDAT_DATA_IN_x(ch, x) _MCMAIN_C_X(0x42dc, ch, x) /* x in 0 .. 1 */
+
++#define REUT_GLOBAL_CTL 0x4800
+ #define REUT_GLOBAL_ERR 0x4804
+
++#define REUT_ch_SUBSEQ_x_CTL(ch, x) (0x4808 + 40 * (ch) + 4 * (x))
++
+ #define REUT_ch_SEQ_CFG(ch) (0x48a8 + 8 * (ch))
+
+ #define REUT_ch_SEQ_CTL(ch) (0x48b8 + 4 * (ch))
+
++#define REUT_ch_SEQ_ADDR_START(ch) (0x48d8 + 8 * (ch))
++
++#define REUT_ch_SEQ_ADDR_WRAP(ch) (0x48e8 + 8 * (ch))
++
++#define REUT_ch_SEQ_MISC_CTL(ch) (0x4908 + 4 * (ch))
++
++#define REUT_ch_SEQ_ADDR_INC_CTL(ch) (0x4910 + 8 * (ch))
++
++#define REUT_ch_RANK_LOG_TO_PHYS(ch) (0x4930 + 4 * (ch)) /* 4 bits per rank */
++
++#define HSW_REUT_ch_SEQ_LOOP_COUNT(ch) (0x4980 + 4 * (ch)) /* *** only on C0 *** */
++
+ /* MCMAIN broadcast */
+ #define MCSCHEDS_CBIT 0x4c20
+
+--
+2.39.2
+
diff --git a/config/coreboot/haswell/patches/0019-haswell-NRI-Add-range-tracking-library.patch b/config/coreboot/haswell/patches/0019-haswell-NRI-Add-range-tracking-library.patch
new file mode 100644
index 00000000..f433b043
--- /dev/null
+++ b/config/coreboot/haswell/patches/0019-haswell-NRI-Add-range-tracking-library.patch
@@ -0,0 +1,222 @@
+From 9fba0468e75877cbda62f5eaeef1946d6489a8f9 Mon Sep 17 00:00:00 2001
+From: Angel Pons <th3fanbus@gmail.com>
+Date: Sun, 8 May 2022 00:56:00 +0200
+Subject: [PATCH 19/26] haswell NRI: Add range tracking library
+
+Implement a small library used to keep track of passing ranges. This
+will be used by 1D training algorithms when margining some parameter.
+
+Change-Id: I8718e85165160afd7c0c8e730b5ce6c9c00f8a60
+Signed-off-by: Angel Pons <th3fanbus@gmail.com>
+---
+ .../intel/haswell/native_raminit/Makefile.inc | 1 +
+ .../intel/haswell/native_raminit/ranges.c | 109 ++++++++++++++++++
+ .../intel/haswell/native_raminit/ranges.h | 68 +++++++++++
+ 3 files changed, 178 insertions(+)
+ create mode 100644 src/northbridge/intel/haswell/native_raminit/ranges.c
+ create mode 100644 src/northbridge/intel/haswell/native_raminit/ranges.h
+
+diff --git a/src/northbridge/intel/haswell/native_raminit/Makefile.inc b/src/northbridge/intel/haswell/native_raminit/Makefile.inc
+index 6e1b365602..2da950771d 100644
+--- a/src/northbridge/intel/haswell/native_raminit/Makefile.inc
++++ b/src/northbridge/intel/haswell/native_raminit/Makefile.inc
+@@ -9,6 +9,7 @@ romstage-y += io_comp_control.c
+ romstage-y += memory_map.c
+ romstage-y += raminit_main.c
+ romstage-y += raminit_native.c
++romstage-y += ranges.c
+ romstage-y += reut.c
+ romstage-y += setup_wdb.c
+ romstage-y += spd_bitmunching.c
+diff --git a/src/northbridge/intel/haswell/native_raminit/ranges.c b/src/northbridge/intel/haswell/native_raminit/ranges.c
+new file mode 100644
+index 0000000000..cdebc1fa66
+--- /dev/null
++++ b/src/northbridge/intel/haswell/native_raminit/ranges.c
+@@ -0,0 +1,109 @@
++/* SPDX-License-Identifier: GPL-2.0-or-later */
++
++#include <types.h>
++
++#include "ranges.h"
++
++void linear_record_pass(
++ struct linear_train_data *const data,
++ const bool pass,
++ const int32_t value,
++ const int32_t start,
++ const int32_t step)
++{
++ /* If this is the first time, initialize all values */
++ if (value == start) {
++ /*
++ * If value passed, create a zero-length region for the current value,
++ * which may be extended as long as the successive values are passing.
++ *
++ * Otherwise, create a zero-length range for the preceding value. This
++ * range cannot be extended by other passing values, which is desired.
++ */
++ data->current.start = start - (pass ? 0 : step);
++ data->current.end = data->current.start;
++ data->largest = data->current;
++ } else if (pass) {
++ /* If this pass is not contiguous, it belongs to a new region */
++ if (data->current.end != (value - step))
++ data->current.start = value;
++
++ /* Update end of current region */
++ data->current.end = value;
++
++ /* Update largest region */
++ if (range_width(data->current) > range_width(data->largest))
++ data->largest = data->current;
++ }
++}
++
++void phase_record_pass(
++ struct phase_train_data *const data,
++ const bool pass,
++ const int32_t value,
++ const int32_t start,
++ const int32_t step)
++{
++ /* If this is the first time, initialize all values */
++ if (value == start) {
++ /*
++ * If value passed, create a zero-length region for the current value,
++ * which may be extended as long as the successive values are passing.
++ *
++ * Otherwise, create a zero-length range for the preceding value. This
++ * range cannot be extended by other passing values, which is desired.
++ */
++ data->current.start = start - (pass ? 0 : step);
++ data->current.end = data->current.start;
++ data->largest = data->current;
++ data->initial = data->current;
++ return;
++ }
++ if (!pass)
++ return;
++
++ /* Update initial region */
++ if (data->initial.end == (value - step))
++ data->initial.end = value;
++
++ /* If this pass is not contiguous, it belongs to a new region */
++ if (data->current.end != (value - step))
++ data->current.start = value;
++
++ /* Update end of current region */
++ data->current.end = value;
++
++ /* Update largest region */
++ if (range_width(data->current) > range_width(data->largest))
++ data->largest = data->current;
++}
++
++void phase_append_initial_to_current(
++ struct phase_train_data *const data,
++ const int32_t start,
++ const int32_t step)
++{
++ /* If initial region is valid and does not overlap, append it */
++ if (data->initial.start == start && data->initial.end != data->current.end)
++ data->current.end += step + range_width(data->initial);
++
++ /* Update largest region */
++ if (range_width(data->current) > range_width(data->largest))
++ data->largest = data->current;
++}
++
++void phase_append_current_to_initial(
++ struct phase_train_data *const data,
++ const int32_t start,
++ const int32_t step)
++{
++ /* If initial region is valid and does not overlap, append it */
++ if (data->initial.start == start && data->initial.end != data->current.end) {
++ data->initial.start -= (step + range_width(data->current));
++ data->current = data->initial;
++ }
++
++ /* Update largest region */
++ if (range_width(data->current) > range_width(data->largest))
++ data->largest = data->current;
++}
+diff --git a/src/northbridge/intel/haswell/native_raminit/ranges.h b/src/northbridge/intel/haswell/native_raminit/ranges.h
+new file mode 100644
+index 0000000000..235392df96
+--- /dev/null
++++ b/src/northbridge/intel/haswell/native_raminit/ranges.h
+@@ -0,0 +1,68 @@
++/* SPDX-License-Identifier: GPL-2.0-or-later */
++
++#ifndef HASWELL_RAMINIT_RANGES_H
++#define HASWELL_RAMINIT_RANGES_H
++
++#include <types.h>
++
++/*
++ * Many algorithms shmoo some parameter to determine the largest passing
++ * range. Provide a common implementation to avoid redundant boilerplate.
++ */
++struct passing_range {
++ int32_t start;
++ int32_t end;
++};
++
++/* Structure for linear parameters, such as roundtrip delays */
++struct linear_train_data {
++ struct passing_range current;
++ struct passing_range largest;
++};
++
++/*
++ * Phase ranges are "circular": the first and last indices are contiguous.
++ * To correctly determine the largest passing range, one has to combine
++ * the initial range and the current range when processing the last index.
++ */
++struct phase_train_data {
++ struct passing_range initial;
++ struct passing_range current;
++ struct passing_range largest;
++};
++
++static inline int32_t range_width(const struct passing_range range)
++{
++ return range.end - range.start;
++}
++
++static inline int32_t range_center(const struct passing_range range)
++{
++ return range.start + range_width(range) / 2;
++}
++
++void linear_record_pass(
++ struct linear_train_data *data,
++ bool pass,
++ int32_t value,
++ int32_t start,
++ int32_t step);
++
++void phase_record_pass(
++ struct phase_train_data *data,
++ bool pass,
++ int32_t value,
++ int32_t start,
++ int32_t step);
++
++void phase_append_initial_to_current(
++ struct phase_train_data *data,
++ int32_t start,
++ int32_t step);
++
++void phase_append_current_to_initial(
++ struct phase_train_data *data,
++ int32_t start,
++ int32_t step);
++
++#endif
+--
+2.39.2
+
diff --git a/config/coreboot/haswell/patches/0020-haswell-NRI-Add-library-to-change-margins.patch b/config/coreboot/haswell/patches/0020-haswell-NRI-Add-library-to-change-margins.patch
new file mode 100644
index 00000000..30926494
--- /dev/null
+++ b/config/coreboot/haswell/patches/0020-haswell-NRI-Add-library-to-change-margins.patch
@@ -0,0 +1,294 @@
+From 54cfbe4cf53d16f747bfcfadd20445a0f5f1e5db Mon Sep 17 00:00:00 2001
+From: Angel Pons <th3fanbus@gmail.com>
+Date: Sun, 8 May 2022 01:11:03 +0200
+Subject: [PATCH 20/26] haswell NRI: Add library to change margins
+
+Implement a library to change Rx/Tx margins. It will be expanded later.
+
+Change-Id: I0b55aba428d8b4d4e16d2fbdec57235ce3ce8adf
+Signed-off-by: Angel Pons <th3fanbus@gmail.com>
+---
+ .../intel/haswell/native_raminit/Makefile.inc | 1 +
+ .../haswell/native_raminit/change_margin.c | 154 ++++++++++++++++++
+ .../haswell/native_raminit/raminit_native.h | 50 ++++++
+ .../intel/haswell/registers/mchbar.h | 9 +
+ 4 files changed, 214 insertions(+)
+ create mode 100644 src/northbridge/intel/haswell/native_raminit/change_margin.c
+
+diff --git a/src/northbridge/intel/haswell/native_raminit/Makefile.inc b/src/northbridge/intel/haswell/native_raminit/Makefile.inc
+index 2da950771d..ebe9e9b762 100644
+--- a/src/northbridge/intel/haswell/native_raminit/Makefile.inc
++++ b/src/northbridge/intel/haswell/native_raminit/Makefile.inc
+@@ -1,5 +1,6 @@
+ ## SPDX-License-Identifier: GPL-2.0-or-later
+
++romstage-y += change_margin.c
+ romstage-y += configure_mc.c
+ romstage-y += ddr3.c
+ romstage-y += jedec_reset.c
+diff --git a/src/northbridge/intel/haswell/native_raminit/change_margin.c b/src/northbridge/intel/haswell/native_raminit/change_margin.c
+new file mode 100644
+index 0000000000..12da59580f
+--- /dev/null
++++ b/src/northbridge/intel/haswell/native_raminit/change_margin.c
+@@ -0,0 +1,154 @@
++/* SPDX-License-Identifier: GPL-2.0-or-later */
++
++#include <commonlib/clamp.h>
++#include <console/console.h>
++#include <delay.h>
++#include <northbridge/intel/haswell/haswell.h>
++#include <timer.h>
++
++#include "raminit_native.h"
++
++void update_rxt(
++ struct sysinfo *ctrl,
++ const uint8_t channel,
++ const uint8_t rank,
++ const uint8_t byte,
++ const enum rxt_subfield subfield,
++ const int32_t value)
++{
++ union ddr_data_rx_train_rank_reg rxt = {
++ .rcven = ctrl->rcven[channel][rank][byte],
++ .dqs_p = ctrl->rxdqsp[channel][rank][byte],
++ .rx_eq = ctrl->rx_eq[channel][rank][byte],
++ .dqs_n = ctrl->rxdqsn[channel][rank][byte],
++ .vref = ctrl->rxvref[channel][rank][byte],
++ };
++ int32_t new_value;
++ switch (subfield) {
++ case RXT_RCVEN:
++ new_value = clamp_s32(0, value, 511);
++ rxt.rcven = new_value;
++ break;
++ case RXT_RXDQS_P:
++ new_value = clamp_s32(0, value, 63);
++ rxt.dqs_p = new_value;
++ break;
++ case RXT_RX_EQ:
++ new_value = clamp_s32(0, value, 31);
++ rxt.rx_eq = new_value;
++ break;
++ case RXT_RXDQS_N:
++ new_value = clamp_s32(0, value, 63);
++ rxt.dqs_n = new_value;
++ break;
++ case RXT_RX_VREF:
++ new_value = clamp_s32(-32, value, 31);
++ rxt.vref = new_value;
++ break;
++ case RXT_RXDQS_BOTH:
++ new_value = clamp_s32(0, value, 63);
++ rxt.dqs_p = new_value;
++ rxt.dqs_n = new_value;
++ break;
++ case RXT_RESTORE:
++ new_value = value;
++ break;
++ default:
++ die("%s: Unhandled subfield index %u\n", __func__, subfield);
++ }
++
++ if (new_value != value) {
++ printk(BIOS_ERR, "%s: Overflow for subfield %u: %d ---> %d\n",
++ __func__, subfield, value, new_value);
++ }
++ mchbar_write32(RX_TRAIN_ch_r_b(channel, rank, byte), rxt.raw);
++ download_regfile(ctrl, channel, false, rank, REG_FILE_USE_RANK, byte, true, false);
++}
++
++void update_txt(
++ struct sysinfo *ctrl,
++ const uint8_t channel,
++ const uint8_t rank,
++ const uint8_t byte,
++ const enum txt_subfield subfield,
++ const int32_t value)
++{
++ union ddr_data_tx_train_rank_reg txt = {
++ .dq_delay = ctrl->tx_dq[channel][rank][byte],
++ .dqs_delay = ctrl->txdqs[channel][rank][byte],
++ .tx_eq = ctrl->tx_eq[channel][rank][byte],
++ };
++ int32_t new_value;
++ switch (subfield) {
++ case TXT_TX_DQ:
++ new_value = clamp_s32(0, value, 511);
++ txt.dq_delay = new_value;
++ break;
++ case TXT_TXDQS:
++ new_value = clamp_s32(0, value, 511);
++ txt.dqs_delay = new_value;
++ break;
++ case TXT_TX_EQ:
++ new_value = clamp_s32(0, value, 63);
++ txt.tx_eq = new_value;
++ break;
++ case TXT_DQDQS_OFF:
++ new_value = value;
++ txt.dqs_delay += new_value;
++ txt.dq_delay += new_value;
++ break;
++ case TXT_RESTORE:
++ new_value = value;
++ break;
++ default:
++ die("%s: Unhandled subfield index %u\n", __func__, subfield);
++ }
++ if (new_value != value) {
++ printk(BIOS_ERR, "%s: Overflow for subfield %u: %d ---> %d\n",
++ __func__, subfield, value, new_value);
++ }
++ mchbar_write32(TX_TRAIN_ch_r_b(channel, rank, byte), txt.raw);
++ download_regfile(ctrl, channel, false, rank, REG_FILE_USE_RANK, byte, false, true);
++}
++
++void download_regfile(
++ struct sysinfo *ctrl,
++ const uint8_t channel,
++ const bool multicast,
++ const uint8_t rank,
++ const enum regfile_mode regfile,
++ const uint8_t byte,
++ const bool read_rf_rd,
++ const bool read_rf_wr)
++{
++ union reut_seq_base_addr_reg reut_seq_base_addr;
++ switch (regfile) {
++ case REG_FILE_USE_START:
++ reut_seq_base_addr.raw = mchbar_read64(REUT_ch_SEQ_ADDR_START(channel));
++ break;
++ case REG_FILE_USE_CURRENT:
++ reut_seq_base_addr.raw = mchbar_read64(REUT_ch_SEQ_ADDR_CURRENT(channel));
++ break;
++ case REG_FILE_USE_RANK:
++ reut_seq_base_addr.raw = 0;
++ if (rank >= NUM_SLOTRANKS)
++ die("%s: bad rank %u\n", __func__, rank);
++ break;
++ default:
++ die("%s: Invalid regfile param %u\n", __func__, regfile);
++ }
++ uint8_t phys_rank = rank;
++ if (reut_seq_base_addr.raw != 0) {
++ /* Map REUT logical rank to physical rank */
++ const uint32_t log_to_phys = mchbar_read32(REUT_ch_RANK_LOG_TO_PHYS(channel));
++ phys_rank = log_to_phys >> (reut_seq_base_addr.rank_addr * 4) & 0x3;
++ }
++ uint32_t reg = multicast ? DDR_DATA_ch_CONTROL_0(channel) : DQ_CONTROL_0(channel, byte);
++ union ddr_data_control_0_reg ddr_data_control_0 = {
++ .raw = mchbar_read32(reg),
++ };
++ ddr_data_control_0.read_rf_rd = read_rf_rd;
++ ddr_data_control_0.read_rf_wr = read_rf_wr;
++ ddr_data_control_0.read_rf_rank = phys_rank;
++ mchbar_write32(reg, ddr_data_control_0.raw);
++}
+diff --git a/src/northbridge/intel/haswell/native_raminit/raminit_native.h b/src/northbridge/intel/haswell/native_raminit/raminit_native.h
+index 56df36ca8d..7c1a786780 100644
+--- a/src/northbridge/intel/haswell/native_raminit/raminit_native.h
++++ b/src/northbridge/intel/haswell/native_raminit/raminit_native.h
+@@ -117,6 +117,30 @@ enum test_stop {
+ ALSOE = 3, /* Stop on all lanes error */
+ };
+
++enum rxt_subfield {
++ RXT_RCVEN = 0,
++ RXT_RXDQS_P = 1,
++ RXT_RX_EQ = 2,
++ RXT_RXDQS_N = 3,
++ RXT_RX_VREF = 4,
++ RXT_RXDQS_BOTH = 5,
++ RXT_RESTORE = 255,
++};
++
++enum txt_subfield {
++ TXT_TX_DQ = 0,
++ TXT_TXDQS = 1,
++ TXT_TX_EQ = 2,
++ TXT_DQDQS_OFF = 3,
++ TXT_RESTORE = 255,
++};
++
++enum regfile_mode {
++ REG_FILE_USE_RANK, /* Used when changing parameters for each rank */
++ REG_FILE_USE_START, /* Used when changing parameters before the test */
++ REG_FILE_USE_CURRENT, /* Used when changing parameters after the test */
++};
++
+ struct wdb_pat {
+ uint32_t start_ptr; /* Starting pointer in WDB */
+ uint32_t stop_ptr; /* Stopping pointer in WDB */
+@@ -452,6 +476,32 @@ uint8_t select_reut_ranks(struct sysinfo *ctrl, uint8_t channel, uint8_t rankmas
+ void run_mpr_io_test(bool clear_errors);
+ uint8_t run_io_test(struct sysinfo *ctrl, uint8_t chanmask, uint8_t dq_pat, bool clear_errors);
+
++void update_rxt(
++ struct sysinfo *ctrl,
++ uint8_t channel,
++ uint8_t rank,
++ uint8_t byte,
++ enum rxt_subfield subfield,
++ int32_t value);
++
++void update_txt(
++ struct sysinfo *ctrl,
++ uint8_t channel,
++ uint8_t rank,
++ uint8_t byte,
++ enum txt_subfield subfield,
++ int32_t value);
++
++void download_regfile(
++ struct sysinfo *ctrl,
++ uint8_t channel,
++ bool multicast,
++ uint8_t rank,
++ enum regfile_mode regfile,
++ uint8_t byte,
++ bool read_rf_rd,
++ bool read_rf_wr);
++
+ uint8_t get_rx_bias(const struct sysinfo *ctrl);
+
+ uint8_t get_tCWL(uint32_t mem_clock_mhz);
+diff --git a/src/northbridge/intel/haswell/registers/mchbar.h b/src/northbridge/intel/haswell/registers/mchbar.h
+index 817a9f8bf8..a81559bb1e 100644
+--- a/src/northbridge/intel/haswell/registers/mchbar.h
++++ b/src/northbridge/intel/haswell/registers/mchbar.h
+@@ -15,7 +15,11 @@
+ /* Register definitions */
+
+ /* DDR DATA per-channel per-bytelane */
++#define RX_TRAIN_ch_r_b(ch, rank, byte) _DDRIO_C_R_B(0x0000, ch, rank, byte)
++#define TX_TRAIN_ch_r_b(ch, rank, byte) _DDRIO_C_R_B(0x0020, ch, rank, byte)
++
+ #define DQ_CONTROL_2(ch, byte) _DDRIO_C_R_B(0x0064, ch, 0, byte)
++#define DQ_CONTROL_0(ch, byte) _DDRIO_C_R_B(0x0074, ch, 0, byte)
+
+ /* DDR CKE per-channel */
+ #define DDR_CKE_ch_CMD_COMP_OFFSET(ch) _DDRIO_C_R_B(0x1204, ch, 0, 0)
+@@ -38,6 +42,9 @@
+ #define DDR_SCRAMBLE_ch(ch) (0x2000 + 4 * (ch))
+ #define DDR_SCRAM_MISC_CONTROL 0x2008
+
++/* DDR DATA per-channel multicast */
++#define DDR_DATA_ch_CONTROL_0(ch) _DDRIO_C_R_B(0x3074, ch, 0, 0)
++
+ /* DDR CMDN/CMDS per-channel (writes go to both CMDN and CMDS fubs) */
+ #define DDR_CMD_ch_COMP_OFFSET(ch) _DDRIO_C_R_B(0x3204, ch, 0, 0)
+ #define DDR_CMD_ch_PI_CODING(ch) _DDRIO_C_R_B(0x3208, ch, 0, 0)
+@@ -147,6 +154,8 @@
+
+ #define REUT_ch_SEQ_ADDR_WRAP(ch) (0x48e8 + 8 * (ch))
+
++#define REUT_ch_SEQ_ADDR_CURRENT(ch) (0x48f8 + 8 * (ch))
++
+ #define REUT_ch_SEQ_MISC_CTL(ch) (0x4908 + 4 * (ch))
+
+ #define REUT_ch_SEQ_ADDR_INC_CTL(ch) (0x4910 + 8 * (ch))
+--
+2.39.2
+
diff --git a/config/coreboot/haswell/patches/0021-haswell-NRI-Add-RcvEn-training.patch b/config/coreboot/haswell/patches/0021-haswell-NRI-Add-RcvEn-training.patch
new file mode 100644
index 00000000..9139a67e
--- /dev/null
+++ b/config/coreboot/haswell/patches/0021-haswell-NRI-Add-RcvEn-training.patch
@@ -0,0 +1,708 @@
+From ac8843553af34855d0331554c03280e66c4ea582 Mon Sep 17 00:00:00 2001
+From: Angel Pons <th3fanbus@gmail.com>
+Date: Sun, 8 May 2022 00:05:41 +0200
+Subject: [PATCH 21/26] haswell NRI: Add RcvEn training
+
+Implement the RcvEn (Receive Enable) calibration procedure.
+
+Change-Id: Ifbfa520f3e0486c56d0988ce67af2ddb9cf29888
+Signed-off-by: Angel Pons <th3fanbus@gmail.com>
+---
+ .../intel/haswell/native_raminit/Makefile.inc | 1 +
+ .../haswell/native_raminit/raminit_main.c | 1 +
+ .../haswell/native_raminit/raminit_native.h | 14 +
+ .../haswell/native_raminit/reg_structs.h | 13 +
+ .../native_raminit/train_receive_enable.c | 561 ++++++++++++++++++
+ .../intel/haswell/registers/mchbar.h | 3 +
+ 6 files changed, 593 insertions(+)
+ create mode 100644 src/northbridge/intel/haswell/native_raminit/train_receive_enable.c
+
+diff --git a/src/northbridge/intel/haswell/native_raminit/Makefile.inc b/src/northbridge/intel/haswell/native_raminit/Makefile.inc
+index ebe9e9b762..e2fbfb4211 100644
+--- a/src/northbridge/intel/haswell/native_raminit/Makefile.inc
++++ b/src/northbridge/intel/haswell/native_raminit/Makefile.inc
+@@ -16,3 +16,4 @@ romstage-y += setup_wdb.c
+ romstage-y += spd_bitmunching.c
+ romstage-y += testing_io.c
+ romstage-y += timings_refresh.c
++romstage-y += train_receive_enable.c
+diff --git a/src/northbridge/intel/haswell/native_raminit/raminit_main.c b/src/northbridge/intel/haswell/native_raminit/raminit_main.c
+index 5e4674957d..7d444659c3 100644
+--- a/src/northbridge/intel/haswell/native_raminit/raminit_main.c
++++ b/src/northbridge/intel/haswell/native_raminit/raminit_main.c
+@@ -60,6 +60,7 @@ static const struct task_entry cold_boot[] = {
+ { configure_memory_map, true, "MEMMAP", },
+ { do_jedec_init, true, "JEDECINIT", },
+ { pre_training, true, "PRETRAIN", },
++ { train_receive_enable, true, "RCVET", },
+ };
+
+ /* Return a generic stepping value to make stepping checks simpler */
+diff --git a/src/northbridge/intel/haswell/native_raminit/raminit_native.h b/src/northbridge/intel/haswell/native_raminit/raminit_native.h
+index 7c1a786780..a36ebfacd1 100644
+--- a/src/northbridge/intel/haswell/native_raminit/raminit_native.h
++++ b/src/northbridge/intel/haswell/native_raminit/raminit_native.h
+@@ -42,6 +42,9 @@
+ #define NUM_WDB_CL_MUX_SEEDS 3
+ #define NUM_CADB_MUX_SEEDS 3
+
++/* Specified in PI ticks. 64 PI ticks == 1 qclk */
++#define tDQSCK_DRIFT 64
++
+ /* ZQ calibration types */
+ enum {
+ ZQ_INIT, /* DDR3: ZQCL with tZQinit, LPDDR3: ZQ Init with tZQinit */
+@@ -188,6 +191,7 @@ enum raminit_status {
+ RAMINIT_STATUS_MPLL_INIT_FAILURE,
+ RAMINIT_STATUS_POLL_TIMEOUT,
+ RAMINIT_STATUS_REUT_ERROR,
++ RAMINIT_STATUS_RCVEN_FAILURE,
+ RAMINIT_STATUS_UNSPECIFIED_ERROR, /** TODO: Deprecated in favor of specific values **/
+ };
+
+@@ -270,6 +274,10 @@ struct sysinfo {
+
+ union ddr_data_vref_adjust_reg dimm_vref;
+
++ uint8_t io_latency[NUM_CHANNELS][NUM_SLOTRANKS];
++ uint8_t rt_latency[NUM_CHANNELS][NUM_SLOTRANKS];
++ uint32_t rt_io_comp[NUM_CHANNELS];
++
+ uint32_t data_offset_train[NUM_CHANNELS][NUM_LANES];
+ uint32_t data_offset_comp[NUM_CHANNELS][NUM_LANES];
+
+@@ -344,6 +352,11 @@ static inline void clear_data_offset_train_all(struct sysinfo *ctrl)
+ memset(ctrl->data_offset_train, 0, sizeof(ctrl->data_offset_train));
+ }
+
++static inline uint32_t get_data_train_feedback(const uint8_t channel, const uint8_t byte)
++{
++ return mchbar_read32(DDR_DATA_TRAIN_FEEDBACK(channel, byte));
++}
++
+ /* Number of ticks to wait in units of 69.841279 ns (citation needed) */
+ static inline void tick_delay(const uint32_t delay)
+ {
+@@ -401,6 +414,7 @@ enum raminit_status convert_timings(struct sysinfo *ctrl);
+ enum raminit_status configure_mc(struct sysinfo *ctrl);
+ enum raminit_status configure_memory_map(struct sysinfo *ctrl);
+ enum raminit_status do_jedec_init(struct sysinfo *ctrl);
++enum raminit_status train_receive_enable(struct sysinfo *ctrl);
+
+ void configure_timings(struct sysinfo *ctrl);
+ void configure_refresh(struct sysinfo *ctrl);
+diff --git a/src/northbridge/intel/haswell/native_raminit/reg_structs.h b/src/northbridge/intel/haswell/native_raminit/reg_structs.h
+index b943259b91..b099f4bb82 100644
+--- a/src/northbridge/intel/haswell/native_raminit/reg_structs.h
++++ b/src/northbridge/intel/haswell/native_raminit/reg_structs.h
+@@ -297,6 +297,19 @@ union ddr_scram_misc_control_reg {
+ uint32_t raw;
+ };
+
++union sc_io_latency_reg {
++ struct __packed {
++ uint32_t iolat_rank0 : 4; // Bits 3:0
++ uint32_t iolat_rank1 : 4; // Bits 7:4
++ uint32_t iolat_rank2 : 4; // Bits 11:8
++ uint32_t iolat_rank3 : 4; // Bits 15:12
++ uint32_t rt_iocomp : 6; // Bits 21:16
++ uint32_t : 9; // Bits 30:22
++ uint32_t dis_rt_clk_gate : 1; // Bits 31:31
++ };
++ uint32_t raw;
++};
++
+ union mcscheds_cbit_reg {
+ struct __packed {
+ uint32_t dis_opp_cas : 1; // Bits 0:0
+diff --git a/src/northbridge/intel/haswell/native_raminit/train_receive_enable.c b/src/northbridge/intel/haswell/native_raminit/train_receive_enable.c
+new file mode 100644
+index 0000000000..576c6bc21e
+--- /dev/null
++++ b/src/northbridge/intel/haswell/native_raminit/train_receive_enable.c
+@@ -0,0 +1,561 @@
++/* SPDX-License-Identifier: GPL-2.0-or-later */
++
++#include <console/console.h>
++#include <northbridge/intel/haswell/haswell.h>
++#include <types.h>
++
++#include "raminit_native.h"
++#include "ranges.h"
++
++#define RCVEN_PLOT RAM_DEBUG
++
++static enum raminit_status change_rcven_timing(struct sysinfo *ctrl, const uint8_t channel)
++{
++ int16_t max_rcven = -4096;
++ int16_t min_rcven = 4096;
++ int16_t max_rcven_rank[NUM_SLOTRANKS];
++ int16_t min_rcven_rank[NUM_SLOTRANKS];
++ for (uint8_t rank = 0; rank < NUM_SLOTRANKS; rank++) {
++ max_rcven_rank[rank] = max_rcven;
++ min_rcven_rank[rank] = min_rcven;
++ }
++ for (uint8_t rank = 0; rank < NUM_SLOTRANKS; rank++) {
++ if (!rank_in_ch(ctrl, rank, channel))
++ continue;
++
++ for (uint8_t byte = 0; byte < ctrl->lanes; byte++) {
++ int16_t new_rcven = ctrl->rcven[channel][rank][byte];
++ new_rcven -= ctrl->io_latency[channel][rank] * 64;
++ if (max_rcven_rank[rank] < new_rcven)
++ max_rcven_rank[rank] = new_rcven;
++
++ if (min_rcven_rank[rank] > new_rcven)
++ min_rcven_rank[rank] = new_rcven;
++ }
++ if (max_rcven < max_rcven_rank[rank])
++ max_rcven = max_rcven_rank[rank];
++
++ if (min_rcven > min_rcven_rank[rank])
++ min_rcven = min_rcven_rank[rank];
++ }
++
++ /*
++ * Determine how far we are from the ideal center point for RcvEn timing.
++ * (PiIdeal - AveRcvEn) / 64 is the ideal number of cycles we should have
++ * for IO latency. command training will reduce this by 64, so plan for
++ * that now in the ideal value. Round to closest integer.
++ */
++ const int16_t rre_pi_ideal = 256 + 64;
++ const int16_t pi_reserve = 64;
++ const int16_t rcven_center = (max_rcven + min_rcven) / 2;
++ const int8_t iolat_target = DIV_ROUND_CLOSEST(rre_pi_ideal - rcven_center, 64);
++
++ int8_t io_g_offset = 0;
++ int8_t io_lat[NUM_SLOTRANKS] = { 0 };
++ for (uint8_t rank = 0; rank < NUM_SLOTRANKS; rank++) {
++ if (!rank_in_ch(ctrl, rank, channel))
++ continue;
++
++ io_lat[rank] = iolat_target;
++
++ /* Check for RcvEn underflow/overflow */
++ const int16_t rcven_lower = 64 * io_lat[rank] + min_rcven_rank[rank];
++ if (rcven_lower < pi_reserve)
++ io_lat[rank] += DIV_ROUND_UP(pi_reserve - rcven_lower, 64);
++
++ const int16_t rcven_upper = 64 * io_lat[rank] + max_rcven_rank[rank];
++ if (rcven_upper > 511 - pi_reserve)
++ io_lat[rank] -= DIV_ROUND_UP(rcven_upper - (511 - pi_reserve), 64);
++
++ /* Check for IO latency over/underflow */
++ if (io_lat[rank] - io_g_offset > 14)
++ io_g_offset = io_lat[rank] - 14;
++
++ if (io_lat[rank] - io_g_offset < 1)
++ io_g_offset = io_lat[rank] - 1;
++
++ const int8_t cycle_offset = io_lat[rank] - ctrl->io_latency[channel][rank];
++ for (uint8_t byte = 0; byte < ctrl->lanes; byte++) {
++ ctrl->rcven[channel][rank][byte] += 64 * cycle_offset;
++ update_rxt(ctrl, channel, rank, byte, RXT_RESTORE, 0);
++ }
++ }
++
++ /* Calculate new IO comp latency */
++ union sc_io_latency_reg sc_io_lat = {
++ .raw = mchbar_read32(SC_IO_LATENCY_ch(channel)),
++ };
++
++ /* Check if we are underflowing or overflowing this field */
++ if (io_g_offset < 0 && sc_io_lat.rt_iocomp < -io_g_offset) {
++ printk(BIOS_ERR, "%s: IO COMP underflow\n", __func__);
++ printk(BIOS_ERR, "io_g_offset: %d\n", io_g_offset);
++ printk(BIOS_ERR, "rt_iocomp: %u\n", sc_io_lat.rt_iocomp);
++ return RAMINIT_STATUS_RCVEN_FAILURE;
++ }
++ if (io_g_offset > 0 && io_g_offset > 0x3f - sc_io_lat.rt_iocomp) {
++ printk(BIOS_ERR, "%s: IO COMP overflow\n", __func__);
++ printk(BIOS_ERR, "io_g_offset: %d\n", io_g_offset);
++ printk(BIOS_ERR, "rt_iocomp: %u\n", sc_io_lat.rt_iocomp);
++ return RAMINIT_STATUS_RCVEN_FAILURE;
++ }
++ sc_io_lat.rt_iocomp += io_g_offset;
++ ctrl->rt_io_comp[channel] = sc_io_lat.rt_iocomp;
++ for (uint8_t rank = 0; rank < NUM_SLOTRANKS; rank++) {
++ if (ctrl->rankmap[channel] & BIT(rank))
++ ctrl->io_latency[channel][rank] = io_lat[rank] - io_g_offset;
++
++ const uint8_t shift = rank * 4;
++ sc_io_lat.raw &= ~(0xf << shift);
++ sc_io_lat.raw |= ctrl->io_latency[channel][rank] << shift;
++ }
++ mchbar_write32(SC_IO_LATENCY_ch(channel), sc_io_lat.raw);
++ return RAMINIT_STATUS_SUCCESS;
++}
++
++#define RL_START (256 + 24)
++#define RL_STOP (384 + 24)
++#define RL_STEP 8
++
++#define RE_NUM_SAMPLES 6
++
++static enum raminit_status verify_high_region(const int32_t center, const int32_t lwidth)
++{
++ if (center > RL_STOP) {
++ /* Check if center of high was found where it should be */
++ printk(BIOS_ERR, "RcvEn: Center of high (%d) higher than expected\n", center);
++ return RAMINIT_STATUS_RCVEN_FAILURE;
++ }
++ if (lwidth <= 32) {
++ /* Check if width is large enough */
++ printk(BIOS_ERR, "RcvEn: Width of high region (%d) too small\n", lwidth);
++ return RAMINIT_STATUS_RCVEN_FAILURE;
++ }
++ if (lwidth >= 96) {
++ /* Since we're calibrating a phase, a too large region is a problem */
++ printk(BIOS_ERR, "RcvEn: Width of high region (%d) too large\n", lwidth);
++ return RAMINIT_STATUS_RCVEN_FAILURE;
++ }
++ return RAMINIT_STATUS_SUCCESS;
++}
++
++static void program_io_latency(struct sysinfo *ctrl, const uint8_t channel, const uint8_t rank)
++{
++ const uint8_t shift = rank * 4;
++ const uint8_t iolat = ctrl->io_latency[channel][rank];
++ mchbar_clrsetbits32(SC_IO_LATENCY_ch(channel), 0xf << shift, iolat << shift);
++}
++
++static void program_rl_delays(struct sysinfo *ctrl, const uint8_t rank, const uint16_t rl_delay)
++{
++ for (uint8_t channel = 0; channel < NUM_CHANNELS; channel++) {
++ if (!rank_in_ch(ctrl, rank, channel))
++ continue;
++
++ for (uint8_t byte = 0; byte < ctrl->lanes; byte++)
++ update_rxt(ctrl, channel, rank, byte, RXT_RCVEN, rl_delay);
++ }
++}
++
++static bool sample_dqs(const uint8_t channel, const uint8_t byte)
++{
++ return (get_data_train_feedback(channel, byte) & 0x1ff) >= BIT(RE_NUM_SAMPLES - 1);
++}
++
++enum raminit_status train_receive_enable(struct sysinfo *ctrl)
++{
++ const struct reut_box reut_addr = {
++ .col = {
++ .start = 0,
++ .stop = 1023,
++ .inc_rate = 0,
++ .inc_val = 1,
++ },
++ };
++ const struct wdb_pat wdb_pattern = {
++ .start_ptr = 0,
++ .stop_ptr = 9,
++ .inc_rate = 32,
++ .dq_pattern = BASIC_VA,
++ };
++
++ const uint16_t bytemask = BIT(ctrl->lanes) - 1;
++ const uint8_t fine_step = 1;
++
++ const uint8_t rt_delta = is_hsw_ult() ? 4 : 2;
++ const uint8_t rt_io_comp = 21 + rt_delta;
++ const uint8_t rt_latency = 16 + rt_delta;
++ setup_io_test(
++ ctrl,
++ ctrl->chanmap,
++ PAT_RD,
++ 2,
++ RE_NUM_SAMPLES + 1,
++ &reut_addr,
++ 0,
++ &wdb_pattern,
++ 0,
++ 8);
++
++ for (uint8_t channel = 0; channel < NUM_CHANNELS; channel++) {
++ if (!does_ch_exist(ctrl, channel))
++ continue;
++
++ for (uint8_t byte = 0; byte < ctrl->lanes; byte++) {
++ union ddr_data_control_2_reg data_control_2 = {
++ .raw = ctrl->dq_control_2[channel][byte],
++ };
++ data_control_2.force_rx_on = 1;
++ mchbar_write32(DQ_CONTROL_2(channel, byte), data_control_2.raw);
++ }
++ union ddr_data_control_0_reg data_control_0 = {
++ .raw = ctrl->dq_control_0[channel],
++ };
++ if (ctrl->lpddr) {
++ /**
++ * W/A for b4618574 - @todo: remove for HSW ULT C0
++ * Can't have force_odt_on together with leaker, disable LPDDR
++ * mode during this training step. lpddr_mode is restored
++ * at the end of this function from the host structure.
++ */
++ data_control_0.lpddr_mode = 0;
++ mchbar_write32(DDR_DATA_ch_CONTROL_0(channel), data_control_0.raw);
++ }
++ data_control_0.force_odt_on = 1;
++ data_control_0.rl_training_mode = 1;
++ mchbar_write32(DDR_DATA_ch_CONTROL_0(channel), data_control_0.raw);
++ mchbar_write32(SC_IO_LATENCY_ch(channel), (union sc_io_latency_reg) {
++ .rt_iocomp = rt_io_comp,
++ }.raw);
++ }
++ enum raminit_status status = RAMINIT_STATUS_SUCCESS;
++ for (uint8_t rank = 0; rank < NUM_SLOTRANKS; rank++) {
++ if (!does_rank_exist(ctrl, rank))
++ continue;
++
++ /*
++ * Set initial roundtrip latency values. Assume -4 QCLK for worst board
++ * layout. This is calculated as HW_ROUNDT_LAT_DEFAULT_VALUE plus:
++ *
++ * DDR3: Default + (2 * tAA) + 4 QCLK + PI_CLK + N-mode value * 2
++ * LPDDR3: Default + (2 * tAA) + 4 QCLK + PI_CLK + tDQSCK_max
++ *
++ * N-mode is 3 during training mode. Both channels use the same timings.
++ */
++ /** TODO: differs for LPDDR **/
++ const uint32_t tmp = MAX(ctrl->multiplier, 4) + 5 + 2 * ctrl->tAA;
++ const uint32_t initial_rt_latency = MIN(rt_latency + tmp, 0x3f);
++
++ uint8_t chanmask = 0;
++ for (uint8_t channel = 0; channel < NUM_CHANNELS; channel++) {
++ chanmask |= select_reut_ranks(ctrl, channel, BIT(rank));
++ if (!rank_in_ch(ctrl, rank, channel))
++ continue;
++
++ ctrl->io_latency[channel][rank] = 0;
++ mchbar_write8(SC_ROUNDT_LAT_ch(channel) + rank, initial_rt_latency);
++ ctrl->rt_latency[channel][rank] = initial_rt_latency;
++ }
++
++ printk(BIOS_DEBUG, "Rank %u\n", rank);
++ printk(BIOS_DEBUG, "Steps 1 and 2: Find middle of high region\n");
++ printk(RCVEN_PLOT, "Byte");
++ for (uint8_t channel = 0; channel < NUM_CHANNELS; channel++) {
++ if (!rank_in_ch(ctrl, rank, channel))
++ continue;
++
++ printk(RCVEN_PLOT, "\t");
++ for (uint8_t byte = 0; byte < ctrl->lanes; byte++)
++ printk(RCVEN_PLOT, "%u ", byte);
++ }
++ printk(RCVEN_PLOT, "\nRcvEn\n");
++ struct phase_train_data region_data[NUM_CHANNELS][NUM_LANES] = { 0 };
++ for (uint16_t rl_delay = RL_START; rl_delay < RL_STOP; rl_delay += RL_STEP) {
++ printk(RCVEN_PLOT, " % 3d", rl_delay);
++ program_rl_delays(ctrl, rank, rl_delay);
++ run_io_test(ctrl, chanmask, BASIC_VA, true);
++ for (uint8_t channel = 0; channel < NUM_CHANNELS; channel++) {
++ if (!rank_in_ch(ctrl, rank, channel))
++ continue;
++
++ printk(RCVEN_PLOT, "\t");
++ for (uint8_t byte = 0; byte < ctrl->lanes; byte++) {
++ const bool high = sample_dqs(channel, byte);
++ printk(RCVEN_PLOT, high ? ". " : "# ");
++ phase_record_pass(
++ &region_data[channel][byte],
++ high,
++ rl_delay,
++ RL_START,
++ RL_STEP);
++ }
++ }
++ printk(RCVEN_PLOT, "\n");
++ }
++ printk(RCVEN_PLOT, "\n");
++ printk(BIOS_DEBUG, "Update RcvEn timing to be in the center of high region\n");
++ for (uint8_t channel = 0; channel < NUM_CHANNELS; channel++) {
++ if (!rank_in_ch(ctrl, rank, channel))
++ continue;
++
++ printk(BIOS_DEBUG, "C%u.R%u: \tLeft\tRight\tWidth\tCenter\n",
++ channel, rank);
++ for (uint8_t byte = 0; byte < ctrl->lanes; byte++) {
++ struct phase_train_data *const curr_data =
++ &region_data[channel][byte];
++ phase_append_current_to_initial(curr_data, RL_START, RL_STEP);
++ const int32_t lwidth = range_width(curr_data->largest);
++ const int32_t center = range_center(curr_data->largest);
++ printk(BIOS_DEBUG, " B%u: \t%d\t%d\t%d\t%d\n",
++ byte,
++ curr_data->largest.start,
++ curr_data->largest.end,
++ lwidth,
++ center);
++
++ status = verify_high_region(center, lwidth);
++ if (status) {
++ printk(BIOS_ERR,
++ "RcvEn problems on channel %u, byte %u\n",
++ channel, byte);
++ goto clean_up;
++ }
++ ctrl->rcven[channel][rank][byte] = center;
++ update_rxt(ctrl, channel, rank, byte, RXT_RESTORE, 0);
++ }
++ printk(BIOS_DEBUG, "\n");
++ }
++
++ printk(BIOS_DEBUG, "Step 3: Quarter preamble - Walk backwards\n");
++ printk(RCVEN_PLOT, "Byte");
++ for (uint8_t channel = 0; channel < NUM_CHANNELS; channel++) {
++ if (!rank_in_ch(ctrl, rank, channel))
++ continue;
++
++ printk(RCVEN_PLOT, "\t");
++ for (uint8_t byte = 0; byte < ctrl->lanes; byte++)
++ printk(RCVEN_PLOT, "%u ", byte);
++ }
++ printk(RCVEN_PLOT, "\nIOLAT\n");
++ bool done = false;
++ while (!done) {
++ run_io_test(ctrl, chanmask, BASIC_VA, true);
++ done = true;
++ for (uint8_t channel = 0; channel < NUM_CHANNELS; channel++) {
++ if (!rank_in_ch(ctrl, rank, channel))
++ continue;
++
++ printk(RCVEN_PLOT, " %2u\t", ctrl->io_latency[channel][rank]);
++ uint16_t highs = 0;
++ for (uint8_t byte = 0; byte < ctrl->lanes; byte++) {
++ const bool high = sample_dqs(channel, byte);
++ printk(RCVEN_PLOT, high ? "H " : "L ");
++ if (high)
++ highs |= BIT(byte);
++ }
++ if (!highs)
++ continue;
++
++ done = false;
++
++ /* If all bytes sample high, adjust timing globally */
++ if (highs == bytemask && ctrl->io_latency[channel][rank] < 14) {
++ ctrl->io_latency[channel][rank] += 2;
++ ctrl->io_latency[channel][rank] %= 16;
++ program_io_latency(ctrl, channel, rank);
++ continue;
++ }
++
++ /* Otherwise, adjust individual bytes */
++ for (uint8_t byte = 0; byte < ctrl->lanes; byte++) {
++ if (!(highs & BIT(byte)))
++ continue;
++
++ if (ctrl->rcven[channel][rank][byte] < 128) {
++ printk(BIOS_ERR,
++ "RcvEn underflow: walking backwards\n");
++ printk(BIOS_ERR,
++ "For channel %u, rank %u, byte %u\n",
++ channel, rank, byte);
++ status = RAMINIT_STATUS_RCVEN_FAILURE;
++ goto clean_up;
++ }
++ ctrl->rcven[channel][rank][byte] -= 128;
++ update_rxt(ctrl, channel, rank, byte, RXT_RESTORE, 0);
++ }
++ }
++ printk(RCVEN_PLOT, "\n");
++ }
++ for (uint8_t channel = 0; channel < NUM_CHANNELS; channel++) {
++ if (!rank_in_ch(ctrl, rank, channel))
++ continue;
++
++ printk(BIOS_DEBUG, "\nC%u: Preamble\n", channel);
++ for (uint8_t byte = 0; byte < ctrl->lanes; byte++) {
++ printk(BIOS_DEBUG,
++ " B%u: %u\n", byte, ctrl->rcven[channel][rank][byte]);
++ }
++ }
++ printk(BIOS_DEBUG, "\n");
++
++ printk(BIOS_DEBUG, "Step 4: Add 1 qclk\n");
++ for (uint8_t channel = 0; channel < NUM_CHANNELS; channel++) {
++ if (!rank_in_ch(ctrl, rank, channel))
++ continue;
++
++ for (uint8_t byte = 0; byte < ctrl->lanes; byte++) {
++ ctrl->rcven[channel][rank][byte] += 64;
++ update_rxt(ctrl, channel, rank, byte, RXT_RESTORE, 0);
++ }
++ }
++ printk(BIOS_DEBUG, "\n");
++
++ printk(BIOS_DEBUG, "Step 5: Walk forward to find rising edge\n");
++ printk(RCVEN_PLOT, "Byte");
++ for (uint8_t channel = 0; channel < NUM_CHANNELS; channel++) {
++ if (!rank_in_ch(ctrl, rank, channel))
++ continue;
++
++ printk(RCVEN_PLOT, "\t");
++ for (uint8_t byte = 0; byte < ctrl->lanes; byte++)
++ printk(RCVEN_PLOT, "%u ", byte);
++ }
++ printk(RCVEN_PLOT, "\n inc\n");
++ uint16_t ch_result[NUM_CHANNELS] = { 0 };
++ uint8_t inc_preamble[NUM_CHANNELS][NUM_LANES] = { 0 };
++ for (uint8_t inc = 0; inc < 64; inc += fine_step) {
++ printk(RCVEN_PLOT, " %2u\t", inc);
++ run_io_test(ctrl, chanmask, BASIC_VA, true);
++ done = true;
++ for (uint8_t channel = 0; channel < NUM_CHANNELS; channel++) {
++ if (!rank_in_ch(ctrl, rank, channel))
++ continue;
++
++ for (uint8_t byte = 0; byte < ctrl->lanes; byte++) {
++ if (ch_result[channel] & BIT(byte)) {
++ /* Skip bytes that are already done */
++ printk(RCVEN_PLOT, ". ");
++ continue;
++ }
++ const bool pass = sample_dqs(channel, byte);
++ printk(RCVEN_PLOT, pass ? ". " : "# ");
++ if (pass) {
++ ch_result[channel] |= BIT(byte);
++ continue;
++ }
++ ctrl->rcven[channel][rank][byte] += fine_step;
++ update_rxt(ctrl, channel, rank, byte, RXT_RESTORE, 0);
++ inc_preamble[channel][byte] = inc;
++ }
++ printk(RCVEN_PLOT, "\t");
++ if (ch_result[channel] != bytemask)
++ done = false;
++ }
++ printk(RCVEN_PLOT, "\n");
++ if (done)
++ break;
++ }
++ printk(BIOS_DEBUG, "\n");
++ if (!done) {
++ printk(BIOS_ERR, "Error: Preamble edge not found for all bytes\n");
++ printk(BIOS_ERR, "The final RcvEn results are as follows:\n");
++ for (uint8_t channel = 0; channel < NUM_CHANNELS; channel++) {
++ if (!rank_in_ch(ctrl, rank, channel))
++ continue;
++
++ printk(BIOS_ERR, "Channel %u Rank %u: preamble\n",
++ channel, rank);
++ for (uint8_t byte = 0; byte < ctrl->lanes; byte++) {
++ printk(BIOS_ERR, " Byte %u: %u%s\n", byte,
++ ctrl->rcven[channel][rank][byte],
++ (ch_result[channel] ^ bytemask) & BIT(byte)
++ ? ""
++ : " *** Check this byte! ***");
++ }
++ }
++ status = RAMINIT_STATUS_RCVEN_FAILURE;
++ goto clean_up;
++ }
++
++ printk(BIOS_DEBUG, "Step 6: center on preamble and clean up rank\n");
++ for (uint8_t channel = 0; channel < NUM_CHANNELS; channel++) {
++ if (!rank_in_ch(ctrl, rank, channel))
++ continue;
++
++ printk(BIOS_DEBUG, "C%u: Preamble increment\n", channel);
++ for (uint8_t byte = 0; byte < ctrl->lanes; byte++) {
++ /*
++ * For Traditional, pull in RcvEn by 64. For ULT, take the DQS
++ * drift into account to the specified guardband: tDQSCK_DRIFT.
++ */
++ ctrl->rcven[channel][rank][byte] -= tDQSCK_DRIFT;
++ update_rxt(ctrl, channel, rank, byte, RXT_RESTORE, 0);
++ printk(BIOS_DEBUG, " B%u: %u %u\n", byte,
++ ctrl->rcven[channel][rank][byte],
++ inc_preamble[channel][byte]);
++ }
++ printk(BIOS_DEBUG, "\n");
++ }
++ printk(BIOS_DEBUG, "\n");
++ }
++
++clean_up:
++ for (uint8_t channel = 0; channel < NUM_CHANNELS; channel++) {
++ if (!does_ch_exist(ctrl, channel))
++ continue;
++
++ if (ctrl->lpddr) {
++ /**
++ * W/A for b4618574 - @todo: remove for HSW ULT C0
++ * Can't have force_odt_on together with leaker, disable LPDDR mode for
++ * this training step. This write will disable force_odt_on while still
++ * keeping LPDDR mode disabled. Second write will restore LPDDR mode.
++ */
++ union ddr_data_control_0_reg data_control_0 = {
++ .raw = ctrl->dq_control_0[channel],
++ };
++ data_control_0.lpddr_mode = 0;
++ mchbar_write32(DDR_DATA_ch_CONTROL_0(channel), data_control_0.raw);
++ }
++ mchbar_write32(DDR_DATA_ch_CONTROL_0(channel), ctrl->dq_control_0[channel]);
++ for (uint8_t byte = 0; byte < ctrl->lanes; byte++) {
++ mchbar_write32(DQ_CONTROL_2(channel, byte),
++ ctrl->dq_control_2[channel][byte]);
++ }
++ }
++ io_reset();
++ if (status)
++ return status;
++
++ printk(BIOS_DEBUG, "Step 7: Sync IO latency across all ranks\n");
++ for (uint8_t channel = 0; channel < NUM_CHANNELS; channel++) {
++ if (!does_ch_exist(ctrl, channel))
++ continue;
++
++ status = change_rcven_timing(ctrl, channel);
++ if (status)
++ return status;
++ }
++ printk(BIOS_DEBUG, "\nFinal Receive Enable and IO latency settings:\n");
++ for (uint8_t channel = 0; channel < NUM_CHANNELS; channel++) {
++ if (!does_ch_exist(ctrl, channel))
++ continue;
++
++ for (uint8_t rank = 0; rank < NUM_SLOTRANKS; rank++) {
++ if (!rank_in_ch(ctrl, rank, channel))
++ continue;
++
++ const union sc_io_latency_reg sc_io_latency = {
++ .raw = mchbar_read32(SC_IO_LATENCY_ch(channel)),
++ };
++ printk(BIOS_DEBUG, " C%u.R%u: IOLAT = %u rt_iocomp = %u\n", channel,
++ rank, ctrl->io_latency[channel][rank], sc_io_latency.rt_iocomp);
++ for (uint8_t byte = 0; byte < ctrl->lanes; byte++) {
++ printk(BIOS_DEBUG, " B%u: %u\n", byte,
++ ctrl->rcven[channel][rank][byte]);
++ }
++ printk(BIOS_DEBUG, "\n");
++ }
++ }
++ return status;
++}
+diff --git a/src/northbridge/intel/haswell/registers/mchbar.h b/src/northbridge/intel/haswell/registers/mchbar.h
+index a81559bb1e..9172d4f2b0 100644
+--- a/src/northbridge/intel/haswell/registers/mchbar.h
++++ b/src/northbridge/intel/haswell/registers/mchbar.h
+@@ -18,6 +18,8 @@
+ #define RX_TRAIN_ch_r_b(ch, rank, byte) _DDRIO_C_R_B(0x0000, ch, rank, byte)
+ #define TX_TRAIN_ch_r_b(ch, rank, byte) _DDRIO_C_R_B(0x0020, ch, rank, byte)
+
++#define DDR_DATA_TRAIN_FEEDBACK(ch, byte) _DDRIO_C_R_B(0x0054, ch, 0, byte)
++
+ #define DQ_CONTROL_2(ch, byte) _DDRIO_C_R_B(0x0064, ch, 0, byte)
+ #define DQ_CONTROL_0(ch, byte) _DDRIO_C_R_B(0x0074, ch, 0, byte)
+
+@@ -100,6 +102,7 @@
+ #define COMMAND_RATE_LIMIT_ch(ch) _MCMAIN_C(0x4010, ch)
+ #define TC_BANK_RANK_D_ch(ch) _MCMAIN_C(0x4014, ch)
+ #define SC_ROUNDT_LAT_ch(ch) _MCMAIN_C(0x4024, ch)
++#define SC_IO_LATENCY_ch(ch) _MCMAIN_C(0x4028, ch)
+
+ #define REUT_ch_PAT_WDB_CL_MUX_CFG(ch) _MCMAIN_C(0x4040, ch)
+
+--
+2.39.2
+
diff --git a/config/coreboot/haswell/patches/0022-haswell-NRI-Add-function-to-change-margins.patch b/config/coreboot/haswell/patches/0022-haswell-NRI-Add-function-to-change-margins.patch
new file mode 100644
index 00000000..2e6de17c
--- /dev/null
+++ b/config/coreboot/haswell/patches/0022-haswell-NRI-Add-function-to-change-margins.patch
@@ -0,0 +1,272 @@
+From 8c3874195c0fc1af9d0b84611496689da1c19d8c Mon Sep 17 00:00:00 2001
+From: Angel Pons <th3fanbus@gmail.com>
+Date: Sun, 8 May 2022 11:58:59 +0200
+Subject: [PATCH 22/26] haswell NRI: Add function to change margins
+
+Implement a function to change margin parameters. Haswell provides a
+register to apply an offset to margin parameters during training, so
+make use of it. There are other margin parameters that have not been
+implemented yet, as they are not needed for now and special handling
+is needed to provide offset training functionality.
+
+Change-Id: I5392380e13de3c44e77b7bc9f3b819e2661d1e2d
+Signed-off-by: Angel Pons <th3fanbus@gmail.com>
+---
+ .../haswell/native_raminit/change_margin.c | 136 ++++++++++++++++++
+ .../haswell/native_raminit/raminit_native.h | 39 +++++
+ .../haswell/native_raminit/reg_structs.h | 12 ++
+ .../intel/haswell/registers/mchbar.h | 1 +
+ 4 files changed, 188 insertions(+)
+
+diff --git a/src/northbridge/intel/haswell/native_raminit/change_margin.c b/src/northbridge/intel/haswell/native_raminit/change_margin.c
+index 12da59580f..4ba9cfa5c6 100644
+--- a/src/northbridge/intel/haswell/native_raminit/change_margin.c
++++ b/src/northbridge/intel/haswell/native_raminit/change_margin.c
+@@ -1,5 +1,6 @@
+ /* SPDX-License-Identifier: GPL-2.0-or-later */
+
++#include <assert.h>
+ #include <commonlib/clamp.h>
+ #include <console/console.h>
+ #include <delay.h>
+@@ -152,3 +153,138 @@ void download_regfile(
+ ddr_data_control_0.read_rf_rank = phys_rank;
+ mchbar_write32(reg, ddr_data_control_0.raw);
+ }
++
++static void update_data_offset_train(
++ struct sysinfo *ctrl,
++ const uint8_t param,
++ const uint8_t en_multicast,
++ const uint8_t channel_in,
++ const uint8_t rank,
++ const uint8_t byte_in,
++ const bool update_ctrl,
++ const enum regfile_mode regfile,
++ const uint32_t value)
++{
++ bool is_rd = false;
++ bool is_wr = false;
++ switch (param) {
++ case RdT:
++ case RdV:
++ case RcvEna:
++ is_rd = true;
++ break;
++ case WrT:
++ case WrDqsT:
++ is_wr = true;
++ break;
++ default:
++ die("%s: Invalid margin parameter %u\n", __func__, param);
++ }
++ if (en_multicast) {
++ mchbar_write32(DDR_DATA_OFFSET_TRAIN, value);
++ for (uint8_t channel = 0; channel < NUM_CHANNELS; channel++) {
++ if (!does_ch_exist(ctrl, channel))
++ continue;
++
++ download_regfile(ctrl, channel, true, rank, regfile, 0, is_rd, is_wr);
++ if (update_ctrl) {
++ for (uint8_t byte = 0; byte < ctrl->lanes; byte++)
++ ctrl->data_offset_train[channel][byte] = value;
++ }
++ }
++ } else {
++ mchbar_write32(DDR_DATA_OFFSET_TRAIN_ch_b(channel_in, byte_in), value);
++ download_regfile(ctrl, channel_in, false, rank, regfile, byte_in, is_rd, is_wr);
++ if (update_ctrl)
++ ctrl->data_offset_train[channel_in][byte_in] = value;
++ }
++}
++
++static uint32_t get_max_margin(const enum margin_parameter param)
++{
++ switch (param) {
++ case RcvEna:
++ case RdT:
++ case WrT:
++ case WrDqsT:
++ return MAX_POSSIBLE_TIME;
++ case RdV:
++ return MAX_POSSIBLE_VREF;
++ default:
++ die("%s: Invalid margin parameter %u\n", __func__, param);
++ }
++}
++
++void change_margin(
++ struct sysinfo *ctrl,
++ const enum margin_parameter param,
++ const int32_t value0,
++ const bool en_multicast,
++ const uint8_t channel,
++ const uint8_t rank,
++ const uint8_t byte,
++ const bool update_ctrl,
++ const enum regfile_mode regfile)
++{
++ /** FIXME: Remove this **/
++ if (rank == 0xff)
++ die("%s: rank is 0xff\n", __func__);
++
++ if (!en_multicast && !does_ch_exist(ctrl, channel))
++ die("%s: Tried to change margin of empty channel %u\n", __func__, channel);
++
++ const uint32_t max_value = get_max_margin(param);
++ const int32_t v0 = clamp_s32(-max_value, value0, max_value);
++
++ union ddr_data_offset_train_reg ddr_data_offset_train = {
++ .raw = en_multicast ? 0 : ctrl->data_offset_train[channel][byte],
++ };
++ bool update_offset_train = false;
++ switch (param) {
++ case RcvEna:
++ ddr_data_offset_train.rcven = v0;
++ update_offset_train = true;
++ break;
++ case RdT:
++ ddr_data_offset_train.rx_dqs = v0;
++ update_offset_train = true;
++ break;
++ case WrT:
++ ddr_data_offset_train.tx_dq = v0;
++ update_offset_train = true;
++ break;
++ case WrDqsT:
++ ddr_data_offset_train.tx_dqs = v0;
++ update_offset_train = true;
++ break;
++ case RdV:
++ ddr_data_offset_train.vref = v0;
++ update_offset_train = true;
++ break;
++ default:
++ die("%s: Invalid margin parameter %u\n", __func__, param);
++ }
++ if (update_offset_train) {
++ update_data_offset_train(
++ ctrl,
++ param,
++ en_multicast,
++ channel,
++ rank,
++ byte,
++ update_ctrl,
++ regfile,
++ ddr_data_offset_train.raw);
++ }
++}
++
++void change_1d_margin_multicast(
++ struct sysinfo *ctrl,
++ const enum margin_parameter param,
++ const int32_t value0,
++ const uint8_t rank,
++ const bool update_ctrl,
++ const enum regfile_mode regfile)
++{
++ change_margin(ctrl, param, value0, true, 0, rank, 0, update_ctrl, regfile);
++}
+diff --git a/src/northbridge/intel/haswell/native_raminit/raminit_native.h b/src/northbridge/intel/haswell/native_raminit/raminit_native.h
+index a36ebfacd1..500fc28909 100644
+--- a/src/northbridge/intel/haswell/native_raminit/raminit_native.h
++++ b/src/northbridge/intel/haswell/native_raminit/raminit_native.h
+@@ -35,6 +35,18 @@
+
+ #define RTTNOM_MASK (BIT(9) | BIT(6) | BIT(2))
+
++/* Margin parameter limits */
++#define MAX_POSSIBLE_TIME 31
++#define MAX_POSSIBLE_VREF 54
++
++#define MAX_POSSIBLE_BOTH MAX_POSSIBLE_VREF
++
++#define MIN_TIME (-MAX_POSSIBLE_TIME)
++#define MAX_TIME (MAX_POSSIBLE_TIME)
++
++#define MIN_VREF (-MAX_POSSIBLE_VREF)
++#define MAX_VREF (MAX_POSSIBLE_VREF)
++
+ #define BASIC_VA_PAT_SPREAD_8 0x01010101
+
+ #define WDB_CACHE_LINE_SIZE 8
+@@ -45,6 +57,14 @@
+ /* Specified in PI ticks. 64 PI ticks == 1 qclk */
+ #define tDQSCK_DRIFT 64
+
++enum margin_parameter {
++ RcvEna,
++ RdT,
++ WrT,
++ WrDqsT,
++ RdV,
++};
++
+ /* ZQ calibration types */
+ enum {
+ ZQ_INIT, /* DDR3: ZQCL with tZQinit, LPDDR3: ZQ Init with tZQinit */
+@@ -516,6 +536,25 @@ void download_regfile(
+ bool read_rf_rd,
+ bool read_rf_wr);
+
++void change_margin(
++ struct sysinfo *ctrl,
++ const enum margin_parameter param,
++ const int32_t value0,
++ const bool en_multicast,
++ const uint8_t channel,
++ const uint8_t rank,
++ const uint8_t byte,
++ const bool update_ctrl,
++ const enum regfile_mode regfile);
++
++void change_1d_margin_multicast(
++ struct sysinfo *ctrl,
++ const enum margin_parameter param,
++ const int32_t value0,
++ const uint8_t rank,
++ const bool update_ctrl,
++ const enum regfile_mode regfile);
++
+ uint8_t get_rx_bias(const struct sysinfo *ctrl);
+
+ uint8_t get_tCWL(uint32_t mem_clock_mhz);
+diff --git a/src/northbridge/intel/haswell/native_raminit/reg_structs.h b/src/northbridge/intel/haswell/native_raminit/reg_structs.h
+index b099f4bb82..a0e36ed082 100644
+--- a/src/northbridge/intel/haswell/native_raminit/reg_structs.h
++++ b/src/northbridge/intel/haswell/native_raminit/reg_structs.h
+@@ -25,6 +25,18 @@ union ddr_data_tx_train_rank_reg {
+ uint32_t raw;
+ };
+
++union ddr_data_offset_train_reg {
++ struct __packed {
++ int32_t rcven : 6; // Bits 5:0
++ int32_t rx_dqs : 6; // Bits 11:6
++ int32_t tx_dq : 6; // Bits 17:12
++ int32_t tx_dqs : 6; // Bits 23:18
++ int32_t vref : 7; // Bits 30:24
++ int32_t : 1; // Bits 31:31
++ };
++ uint32_t raw;
++};
++
+ union ddr_data_control_0_reg {
+ struct __packed {
+ uint32_t rx_training_mode : 1; // Bits 0:0
+diff --git a/src/northbridge/intel/haswell/registers/mchbar.h b/src/northbridge/intel/haswell/registers/mchbar.h
+index 9172d4f2b0..0acafbc826 100644
+--- a/src/northbridge/intel/haswell/registers/mchbar.h
++++ b/src/northbridge/intel/haswell/registers/mchbar.h
+@@ -21,6 +21,7 @@
+ #define DDR_DATA_TRAIN_FEEDBACK(ch, byte) _DDRIO_C_R_B(0x0054, ch, 0, byte)
+
+ #define DQ_CONTROL_2(ch, byte) _DDRIO_C_R_B(0x0064, ch, 0, byte)
++#define DDR_DATA_OFFSET_TRAIN_ch_b(ch, byte) _DDRIO_C_R_B(0x0070, ch, 0, byte)
+ #define DQ_CONTROL_0(ch, byte) _DDRIO_C_R_B(0x0074, ch, 0, byte)
+
+ /* DDR CKE per-channel */
+--
+2.39.2
+
diff --git a/config/coreboot/haswell/patches/0023-haswell-NRI-Add-read-MPR-training.patch b/config/coreboot/haswell/patches/0023-haswell-NRI-Add-read-MPR-training.patch
new file mode 100644
index 00000000..b13eb2db
--- /dev/null
+++ b/config/coreboot/haswell/patches/0023-haswell-NRI-Add-read-MPR-training.patch
@@ -0,0 +1,331 @@
+From 6781cec818501f7afd6ee26464fd4556ac3068cb Mon Sep 17 00:00:00 2001
+From: Angel Pons <th3fanbus@gmail.com>
+Date: Sun, 8 May 2022 11:35:49 +0200
+Subject: [PATCH 23/26] haswell NRI: Add read MPR training
+
+Implement read training using DDR3 MPR (Multi-Purpose Register).
+
+Change-Id: Id17cb2c4c399ac9bcc937b595b58f863c152461b
+Signed-off-by: Angel Pons <th3fanbus@gmail.com>
+---
+ .../intel/haswell/native_raminit/Makefile.inc | 1 +
+ .../haswell/native_raminit/raminit_main.c | 1 +
+ .../haswell/native_raminit/raminit_native.h | 4 +
+ .../haswell/native_raminit/train_read_mpr.c | 240 ++++++++++++++++++
+ .../intel/haswell/registers/mchbar.h | 2 +-
+ 5 files changed, 247 insertions(+), 1 deletion(-)
+ create mode 100644 src/northbridge/intel/haswell/native_raminit/train_read_mpr.c
+
+diff --git a/src/northbridge/intel/haswell/native_raminit/Makefile.inc b/src/northbridge/intel/haswell/native_raminit/Makefile.inc
+index e2fbfb4211..c442be0728 100644
+--- a/src/northbridge/intel/haswell/native_raminit/Makefile.inc
++++ b/src/northbridge/intel/haswell/native_raminit/Makefile.inc
+@@ -16,4 +16,5 @@ romstage-y += setup_wdb.c
+ romstage-y += spd_bitmunching.c
+ romstage-y += testing_io.c
+ romstage-y += timings_refresh.c
++romstage-y += train_read_mpr.c
+ romstage-y += train_receive_enable.c
+diff --git a/src/northbridge/intel/haswell/native_raminit/raminit_main.c b/src/northbridge/intel/haswell/native_raminit/raminit_main.c
+index 7d444659c3..264d1468f5 100644
+--- a/src/northbridge/intel/haswell/native_raminit/raminit_main.c
++++ b/src/northbridge/intel/haswell/native_raminit/raminit_main.c
+@@ -61,6 +61,7 @@ static const struct task_entry cold_boot[] = {
+ { do_jedec_init, true, "JEDECINIT", },
+ { pre_training, true, "PRETRAIN", },
+ { train_receive_enable, true, "RCVET", },
++ { train_read_mpr, true, "RDMPRT", },
+ };
+
+ /* Return a generic stepping value to make stepping checks simpler */
+diff --git a/src/northbridge/intel/haswell/native_raminit/raminit_native.h b/src/northbridge/intel/haswell/native_raminit/raminit_native.h
+index 500fc28909..a7551ad63c 100644
+--- a/src/northbridge/intel/haswell/native_raminit/raminit_native.h
++++ b/src/northbridge/intel/haswell/native_raminit/raminit_native.h
+@@ -27,6 +27,8 @@
+ /* Always use 12 legs for emphasis (not trained) */
+ #define TXEQFULLDRV (3 << 4)
+
++#define LOOPCOUNT_INFINITE 0xff
++
+ /* DDR3 mode register bits */
+ #define MR0_DLL_RESET BIT(8)
+
+@@ -212,6 +214,7 @@ enum raminit_status {
+ RAMINIT_STATUS_POLL_TIMEOUT,
+ RAMINIT_STATUS_REUT_ERROR,
+ RAMINIT_STATUS_RCVEN_FAILURE,
++ RAMINIT_STATUS_RMPR_FAILURE,
+ RAMINIT_STATUS_UNSPECIFIED_ERROR, /** TODO: Deprecated in favor of specific values **/
+ };
+
+@@ -435,6 +438,7 @@ enum raminit_status configure_mc(struct sysinfo *ctrl);
+ enum raminit_status configure_memory_map(struct sysinfo *ctrl);
+ enum raminit_status do_jedec_init(struct sysinfo *ctrl);
+ enum raminit_status train_receive_enable(struct sysinfo *ctrl);
++enum raminit_status train_read_mpr(struct sysinfo *ctrl);
+
+ void configure_timings(struct sysinfo *ctrl);
+ void configure_refresh(struct sysinfo *ctrl);
+diff --git a/src/northbridge/intel/haswell/native_raminit/train_read_mpr.c b/src/northbridge/intel/haswell/native_raminit/train_read_mpr.c
+new file mode 100644
+index 0000000000..0225e1a384
+--- /dev/null
++++ b/src/northbridge/intel/haswell/native_raminit/train_read_mpr.c
+@@ -0,0 +1,240 @@
++/* SPDX-License-Identifier: GPL-2.0-or-later */
++
++#include <commonlib/clamp.h>
++#include <console/console.h>
++#include <delay.h>
++#include <northbridge/intel/haswell/haswell.h>
++#include <types.h>
++
++#include "raminit_native.h"
++#include "ranges.h"
++
++#define RMPR_START (-32)
++#define RMPR_STOP (32)
++#define RMPR_STEP 1
++
++#define RMPR_MIN_WIDTH 12
++
++#define RMPR_PLOT RAM_DEBUG
++
++/*
++ * Clear rx_training_mode. For LPDDR, we first need to disable odt_samp_extend_en,
++ * then disable rx_training_mode, and finally re-enable odt_samp_extend_en.
++ */
++static void clear_rx_training_mode(struct sysinfo *ctrl, const uint8_t channel)
++{
++ for (uint8_t byte = 0; byte < ctrl->lanes; byte++)
++ mchbar_write32(DQ_CONTROL_2(channel, byte), ctrl->dq_control_2[channel][byte]);
++
++ if (ctrl->lpddr) {
++ union ddr_data_control_0_reg data_control_0 = {
++ .raw = mchbar_read32(DDR_DATA_ch_CONTROL_0(channel)),
++ };
++ data_control_0.odt_samp_extend_en = 0;
++ mchbar_write32(DDR_DATA_ch_CONTROL_0(channel), data_control_0.raw);
++ tick_delay(1);
++ data_control_0.rx_training_mode = 0;
++ mchbar_write32(DDR_DATA_ch_CONTROL_0(channel), data_control_0.raw);
++ tick_delay(1);
++ }
++ mchbar_write32(DDR_DATA_ch_CONTROL_0(channel), ctrl->dq_control_0[channel]);
++}
++
++static void set_rxdqs_edges_to_midpoint(struct sysinfo *ctrl)
++{
++ for (uint8_t channel = 0; channel < NUM_CHANNELS; channel++) {
++ if (!does_ch_exist(ctrl, channel))
++ continue;
++
++ for (uint8_t rank = 0; rank < NUM_SLOTRANKS; rank++) {
++ if (!rank_in_ch(ctrl, rank, channel))
++ continue;
++
++ for (uint8_t byte = 0; byte < ctrl->lanes; byte++)
++ update_rxt(ctrl, channel, rank, byte, RXT_RXDQS_BOTH, 32);
++ }
++ }
++}
++
++static void enter_mpr_train_ddr_mode(struct sysinfo *ctrl, const uint8_t rank)
++{
++ /* Program MR3 and mask RAS/WE to prevent scheduler from issuing non-read commands */
++ for (uint8_t channel = 0; channel < NUM_CHANNELS; channel++) {
++ if (!rank_in_ch(ctrl, rank, channel))
++ continue;
++
++ if (!ctrl->lpddr)
++ reut_issue_mrs(ctrl, channel, BIT(rank), 3, 1 << 2);
++
++ union reut_misc_odt_ctrl_reg reut_misc_odt_ctrl = {
++ .raw = mchbar_read32(REUT_ch_MISC_ODT_CTRL(channel)),
++ };
++ reut_misc_odt_ctrl.mpr_train_ddr_on = 1;
++ mchbar_write32(REUT_ch_MISC_ODT_CTRL(channel), reut_misc_odt_ctrl.raw);
++ }
++}
++
++static void leave_mpr_train_ddr_mode(struct sysinfo *ctrl, const uint8_t rank)
++{
++ for (uint8_t channel = 0; channel < NUM_CHANNELS; channel++) {
++ if (!rank_in_ch(ctrl, rank, channel))
++ continue;
++
++ /*
++ * The mpr_train_ddr_on bit will force a special command.
++ * Therefore, clear it before issuing the MRS command.
++ */
++ union reut_misc_odt_ctrl_reg reut_misc_odt_ctrl = {
++ .raw = mchbar_read32(REUT_ch_MISC_ODT_CTRL(channel)),
++ };
++ reut_misc_odt_ctrl.mpr_train_ddr_on = 0;
++ mchbar_write32(REUT_ch_MISC_ODT_CTRL(channel), reut_misc_odt_ctrl.raw);
++ if (!ctrl->lpddr)
++ reut_issue_mrs(ctrl, channel, BIT(rank), 3, 0 << 2);
++ }
++}
++
++enum raminit_status train_read_mpr(struct sysinfo *ctrl)
++{
++ set_rxdqs_edges_to_midpoint(ctrl);
++ clear_data_offset_train_all(ctrl);
++ setup_io_test_mpr(ctrl, ctrl->chanmap, LOOPCOUNT_INFINITE, NSOE);
++ enum raminit_status status = RAMINIT_STATUS_SUCCESS;
++ for (uint8_t rank = 0; rank < NUM_SLOTRANKS; rank++) {
++ if (!does_rank_exist(ctrl, rank))
++ continue;
++
++ printk(BIOS_DEBUG, "Rank %u\n", rank);
++ printk(RMPR_PLOT, "Channel");
++ for (uint8_t channel = 0; channel < NUM_CHANNELS; channel++) {
++ if (!rank_in_ch(ctrl, rank, channel))
++ continue;
++
++ printk(RMPR_PLOT, "\t%u\t\t", channel);
++ }
++ printk(RMPR_PLOT, "\nByte");
++ for (uint8_t channel = 0; channel < NUM_CHANNELS; channel++) {
++ if (!rank_in_ch(ctrl, rank, channel))
++ continue;
++
++ printk(RMPR_PLOT, "\t");
++ for (uint8_t byte = 0; byte < ctrl->lanes; byte++)
++ printk(RMPR_PLOT, "%u ", byte);
++ }
++ enter_mpr_train_ddr_mode(ctrl, rank);
++ struct linear_train_data region_data[NUM_CHANNELS][NUM_LANES] = { 0 };
++ for (uint8_t channel = 0; channel < NUM_CHANNELS; channel++)
++ select_reut_ranks(ctrl, channel, BIT(rank));
++
++ printk(RMPR_PLOT, "\nDqsDelay\n");
++ int8_t dqs_delay;
++ for (dqs_delay = RMPR_START; dqs_delay < RMPR_STOP; dqs_delay += RMPR_STEP) {
++ printk(RMPR_PLOT, "% 5d", dqs_delay);
++ const enum regfile_mode regfile = REG_FILE_USE_START;
++ change_1d_margin_multicast(ctrl, RdT, dqs_delay, 0, false, regfile);
++ for (uint8_t channel = 0; channel < NUM_CHANNELS; channel++) {
++ if (!rank_in_ch(ctrl, rank, channel))
++ continue;
++
++ for (uint8_t byte = 0; byte < ctrl->lanes; byte++) {
++ union ddr_data_control_2_reg data_control_2 = {
++ .raw = ctrl->dq_control_2[channel][byte],
++ };
++ data_control_2.force_bias_on = 1;
++ data_control_2.force_rx_on = 1;
++ data_control_2.leaker_comp = 0;
++ mchbar_write32(DQ_CONTROL_2(channel, byte),
++ data_control_2.raw);
++ }
++ union ddr_data_control_0_reg data_control_0 = {
++ .raw = ctrl->dq_control_0[channel],
++ };
++ data_control_0.rx_training_mode = 1;
++ data_control_0.force_odt_on = !ctrl->lpddr;
++ data_control_0.en_read_preamble = 0;
++ data_control_0.odt_samp_extend_en = ctrl->lpddr;
++ const uint32_t reg_offset = DDR_DATA_ch_CONTROL_0(channel);
++ mchbar_write32(reg_offset, data_control_0.raw);
++ }
++ run_mpr_io_test(false);
++ for (uint8_t channel = 0; channel < NUM_CHANNELS; channel++) {
++ if (!rank_in_ch(ctrl, rank, channel))
++ continue;
++
++ printk(RMPR_PLOT, "\t");
++ for (uint8_t byte = 0; byte < ctrl->lanes; byte++) {
++ uint32_t fb = get_data_train_feedback(channel, byte);
++ const bool pass = fb == 1;
++ printk(RMPR_PLOT, pass ? ". " : "# ");
++ linear_record_pass(
++ &region_data[channel][byte],
++ pass,
++ dqs_delay,
++ RMPR_START,
++ RMPR_STEP);
++ }
++ }
++ printk(RMPR_PLOT, "\n");
++ for (uint8_t channel = 0; channel < NUM_CHANNELS; channel++) {
++ if (!rank_in_ch(ctrl, rank, channel))
++ continue;
++
++ clear_rx_training_mode(ctrl, channel);
++ }
++ io_reset();
++ }
++ printk(RMPR_PLOT, "\n");
++ leave_mpr_train_ddr_mode(ctrl, rank);
++ clear_data_offset_train_all(ctrl);
++ for (uint8_t channel = 0; channel < NUM_CHANNELS; channel++) {
++ if (!rank_in_ch(ctrl, rank, channel))
++ continue;
++
++ printk(BIOS_DEBUG, "C%u.R%u: \tLeft\tRight\tWidth\tCenter\tRxDqsPN\n",
++ channel, rank);
++ for (uint8_t byte = 0; byte < ctrl->lanes; byte++) {
++ struct linear_train_data *data = &region_data[channel][byte];
++ const int32_t lwidth = range_width(data->largest);
++ if (lwidth <= RMPR_MIN_WIDTH) {
++ printk(BIOS_ERR,
++ "Bad eye (lwidth %d <= min %d) for byte %u\n",
++ lwidth, RMPR_MIN_WIDTH, byte);
++ status = RAMINIT_STATUS_RMPR_FAILURE;
++ }
++ /*
++ * The MPR center may not be ideal on certain platforms for
++ * unknown reasons. If so, adjust it with a magical number.
++ * For Haswell, the magical number is zero. Hell knows why.
++ */
++ const int32_t center = range_center(data->largest);
++ ctrl->rxdqsp[channel][rank][byte] = center - RMPR_START;
++ ctrl->rxdqsn[channel][rank][byte] = center - RMPR_START;
++ printk(BIOS_DEBUG, " B%u: \t%d\t%d\t%d\t%d\t%u\n", byte,
++ data->largest.start, data->largest.end, lwidth,
++ center, ctrl->rxdqsp[channel][rank][byte]);
++ }
++ printk(BIOS_DEBUG, "\n");
++ }
++ }
++
++ /*
++ * Now program the DQS center values on populated ranks. data is taken from
++ * the host struct. We need to do it after all ranks are trained, because we
++ * need to keep the same DQS value on all ranks during the training procedure.
++ */
++ for (uint8_t channel = 0; channel < NUM_CHANNELS; channel++) {
++ if (!does_ch_exist(ctrl, channel))
++ continue;
++
++ for (uint8_t rank = 0; rank < NUM_SLOTRANKS; rank++) {
++ if (!rank_in_ch(ctrl, rank, channel))
++ continue;
++
++ for (uint8_t byte = 0; byte < ctrl->lanes; byte++)
++ update_rxt(ctrl, channel, rank, byte, RXT_RESTORE, 0);
++ }
++ }
++ change_1d_margin_multicast(ctrl, RdT, 0, 0, false, REG_FILE_USE_CURRENT);
++ io_reset();
++ return status;
++}
+diff --git a/src/northbridge/intel/haswell/registers/mchbar.h b/src/northbridge/intel/haswell/registers/mchbar.h
+index 0acafbc826..6a31d3a32c 100644
+--- a/src/northbridge/intel/haswell/registers/mchbar.h
++++ b/src/northbridge/intel/haswell/registers/mchbar.h
+@@ -122,7 +122,7 @@
+ #define REUT_ch_ERR_DATA_MASK(ch) _MCMAIN_C(0x40d8, ch)
+
+ #define REUT_ch_MISC_CKE_CTRL(ch) _MCMAIN_C(0x4190, ch)
+-
++#define REUT_ch_MISC_ODT_CTRL(ch) _MCMAIN_C(0x4194, ch)
+ #define REUT_ch_MISC_PAT_CADB_CTRL(ch) _MCMAIN_C(0x4198, ch)
+ #define REUT_ch_PAT_CADB_MRS(ch) _MCMAIN_C(0x419c, ch)
+ #define REUT_ch_PAT_CADB_MUX_CTRL(ch) _MCMAIN_C(0x41a0, ch)
+--
+2.39.2
+
diff --git a/config/coreboot/haswell/patches/0024-haswell-NRI-Add-write-leveling.patch b/config/coreboot/haswell/patches/0024-haswell-NRI-Add-write-leveling.patch
new file mode 100644
index 00000000..59e9af9d
--- /dev/null
+++ b/config/coreboot/haswell/patches/0024-haswell-NRI-Add-write-leveling.patch
@@ -0,0 +1,688 @@
+From 20fe4fa852d3e13851a01b51dc984ec5976c864e Mon Sep 17 00:00:00 2001
+From: Angel Pons <th3fanbus@gmail.com>
+Date: Sun, 8 May 2022 12:56:04 +0200
+Subject: [PATCH 24/26] haswell NRI: Add write leveling
+
+Implement JEDEC write leveling, which is done in two steps. The first
+step uses the JEDEC procedure to do "fine" write leveling, i.e. align
+the DQS phase to the clock signal. The second step performs a regular
+read-write test to correct "coarse" cycle errors.
+
+Change-Id: I27678523fe22c38173a688e2a4751c259a20f009
+Signed-off-by: Angel Pons <th3fanbus@gmail.com>
+---
+ .../intel/haswell/native_raminit/Makefile.inc | 1 +
+ .../haswell/native_raminit/raminit_main.c | 1 +
+ .../haswell/native_raminit/raminit_native.h | 10 +
+ .../train_jedec_write_leveling.c | 580 ++++++++++++++++++
+ .../intel/haswell/registers/mchbar.h | 2 +
+ 5 files changed, 594 insertions(+)
+ create mode 100644 src/northbridge/intel/haswell/native_raminit/train_jedec_write_leveling.c
+
+diff --git a/src/northbridge/intel/haswell/native_raminit/Makefile.inc b/src/northbridge/intel/haswell/native_raminit/Makefile.inc
+index c442be0728..40c2f5e014 100644
+--- a/src/northbridge/intel/haswell/native_raminit/Makefile.inc
++++ b/src/northbridge/intel/haswell/native_raminit/Makefile.inc
+@@ -16,5 +16,6 @@ romstage-y += setup_wdb.c
+ romstage-y += spd_bitmunching.c
+ romstage-y += testing_io.c
+ romstage-y += timings_refresh.c
++romstage-y += train_jedec_write_leveling.c
+ romstage-y += train_read_mpr.c
+ romstage-y += train_receive_enable.c
+diff --git a/src/northbridge/intel/haswell/native_raminit/raminit_main.c b/src/northbridge/intel/haswell/native_raminit/raminit_main.c
+index 264d1468f5..1ff23be615 100644
+--- a/src/northbridge/intel/haswell/native_raminit/raminit_main.c
++++ b/src/northbridge/intel/haswell/native_raminit/raminit_main.c
+@@ -62,6 +62,7 @@ static const struct task_entry cold_boot[] = {
+ { pre_training, true, "PRETRAIN", },
+ { train_receive_enable, true, "RCVET", },
+ { train_read_mpr, true, "RDMPRT", },
++ { train_jedec_write_leveling, true, "JWRL", },
+ };
+
+ /* Return a generic stepping value to make stepping checks simpler */
+diff --git a/src/northbridge/intel/haswell/native_raminit/raminit_native.h b/src/northbridge/intel/haswell/native_raminit/raminit_native.h
+index a7551ad63c..666b233c45 100644
+--- a/src/northbridge/intel/haswell/native_raminit/raminit_native.h
++++ b/src/northbridge/intel/haswell/native_raminit/raminit_native.h
+@@ -59,6 +59,9 @@
+ /* Specified in PI ticks. 64 PI ticks == 1 qclk */
+ #define tDQSCK_DRIFT 64
+
++/* Maximum additional latency */
++#define MAX_ADD_DELAY 2
++
+ enum margin_parameter {
+ RcvEna,
+ RdT,
+@@ -215,6 +218,7 @@ enum raminit_status {
+ RAMINIT_STATUS_REUT_ERROR,
+ RAMINIT_STATUS_RCVEN_FAILURE,
+ RAMINIT_STATUS_RMPR_FAILURE,
++ RAMINIT_STATUS_JWRL_FAILURE,
+ RAMINIT_STATUS_UNSPECIFIED_ERROR, /** TODO: Deprecated in favor of specific values **/
+ };
+
+@@ -380,6 +384,11 @@ static inline uint32_t get_data_train_feedback(const uint8_t channel, const uint
+ return mchbar_read32(DDR_DATA_TRAIN_FEEDBACK(channel, byte));
+ }
+
++static inline uint16_t get_byte_group_errors(const uint8_t channel)
++{
++ return mchbar_read32(4 + REUT_ch_ERR_MISC_STATUS(channel)) & 0x1ff;
++}
++
+ /* Number of ticks to wait in units of 69.841279 ns (citation needed) */
+ static inline void tick_delay(const uint32_t delay)
+ {
+@@ -439,6 +448,7 @@ enum raminit_status configure_memory_map(struct sysinfo *ctrl);
+ enum raminit_status do_jedec_init(struct sysinfo *ctrl);
+ enum raminit_status train_receive_enable(struct sysinfo *ctrl);
+ enum raminit_status train_read_mpr(struct sysinfo *ctrl);
++enum raminit_status train_jedec_write_leveling(struct sysinfo *ctrl);
+
+ void configure_timings(struct sysinfo *ctrl);
+ void configure_refresh(struct sysinfo *ctrl);
+diff --git a/src/northbridge/intel/haswell/native_raminit/train_jedec_write_leveling.c b/src/northbridge/intel/haswell/native_raminit/train_jedec_write_leveling.c
+new file mode 100644
+index 0000000000..1ba28a3bd4
+--- /dev/null
++++ b/src/northbridge/intel/haswell/native_raminit/train_jedec_write_leveling.c
+@@ -0,0 +1,580 @@
++/* SPDX-License-Identifier: GPL-2.0-or-later */
++
++#include <assert.h>
++#include <console/console.h>
++#include <delay.h>
++#include <northbridge/intel/haswell/haswell.h>
++#include <types.h>
++
++#include "raminit_native.h"
++#include "ranges.h"
++
++#define JWLC_PLOT RAM_DEBUG
++#define JWRL_PLOT RAM_DEBUG
++
++static void reset_dram_dll(struct sysinfo *ctrl, const uint8_t channel, const uint8_t rank)
++{
++ reut_issue_mrs(ctrl, channel, BIT(rank), 0, ctrl->mr0[channel][rank] | MR0_DLL_RESET);
++}
++
++static void program_wdb_pattern(struct sysinfo *ctrl, const bool invert)
++{
++ /* Pattern to keep DQ-DQS simple but detect any failures. Same as NHM/WSM. */
++ const uint8_t pat[4][2] = {
++ { 0x00, 0xff },
++ { 0xff, 0x00 },
++ { 0xc3, 0x3c },
++ { 0x3c, 0xc3 },
++ };
++ const uint8_t pmask[2][8] = {
++ { 0, 0, 1, 1, 1, 1, 0, 0 },
++ { 1, 1, 0, 0, 0, 0, 1, 1 },
++ };
++ for (uint8_t s = 0; s < ARRAY_SIZE(pat); s++)
++ write_wdb_fixed_pat(ctrl, pat[s], pmask[invert], ARRAY_SIZE(pmask[invert]), s);
++}
++
++static int16_t set_add_delay(uint32_t *add_delay, uint8_t rank, int8_t target_off)
++{
++ const uint8_t shift = rank * 2;
++ if (target_off > MAX_ADD_DELAY) {
++ *add_delay &= ~(3 << shift);
++ *add_delay |= MAX_ADD_DELAY << shift;
++ return 128 * (target_off - MAX_ADD_DELAY);
++ } else if (target_off < 0) {
++ *add_delay &= ~(3 << shift);
++ *add_delay |= 0 << shift;
++ return 128 * target_off;
++ } else {
++ *add_delay &= ~(3 << shift);
++ *add_delay |= target_off << shift;
++ return 0;
++ }
++}
++
++static enum raminit_status train_jedec_write_leveling_cleanup(struct sysinfo *ctrl)
++{
++ const struct reut_box reut_addr = {
++ .col = {
++ .start = 0,
++ .stop = 1023,
++ .inc_val = 1,
++ },
++ };
++ const struct wdb_pat wdb_pattern = {
++ .start_ptr = 0,
++ .stop_ptr = 3,
++ .inc_rate = 1,
++ .dq_pattern = BASIC_VA,
++ };
++ const int8_t offsets[] = { 0, 1, -1, 2, 3 };
++ const int8_t dq_offsets[] = { 0, -10, 10, -5, 5, -15, 15 };
++ const uint8_t dq_offset_max = ARRAY_SIZE(dq_offsets);
++
++ /* Set LFSR seeds to be sequential */
++ program_wdb_lfsr(ctrl, true);
++ setup_io_test(
++ ctrl,
++ ctrl->chanmap,
++ PAT_WR_RD,
++ 2,
++ 4,
++ &reut_addr,
++ NSOE,
++ &wdb_pattern,
++ 0,
++ 0);
++
++ const union reut_pat_wdb_cl_mux_cfg_reg reut_wdb_cl_mux_cfg = {
++ .mux_0_control = REUT_MUX_BTBUFFER,
++ .mux_1_control = REUT_MUX_BTBUFFER,
++ .mux_2_control = REUT_MUX_BTBUFFER,
++ .ecc_data_source_sel = 1,
++ };
++ for (uint8_t channel = 0; channel < NUM_CHANNELS; channel++) {
++ if (!does_ch_exist(ctrl, channel))
++ continue;
++
++ mchbar_write32(REUT_ch_PAT_WDB_CL_MUX_CFG(channel), reut_wdb_cl_mux_cfg.raw);
++ }
++
++ int8_t byte_off[NUM_CHANNELS][NUM_LANES] = { 0 };
++ uint32_t add_delay[NUM_CHANNELS] = { 0 };
++ enum raminit_status status = RAMINIT_STATUS_SUCCESS;
++ bool invert = false;
++ const uint16_t valid_byte_mask = BIT(ctrl->lanes) - 1;
++ for (uint8_t rank = 0; rank < NUM_SLOTRANKS; rank++) {
++ uint8_t chanmask = 0;
++ for (uint8_t channel = 0; channel < NUM_CHANNELS; channel++)
++ chanmask |= select_reut_ranks(ctrl, channel, BIT(rank));
++
++ if (!chanmask)
++ continue;
++
++ printk(BIOS_DEBUG, "Rank %u\n", rank);
++ printk(JWLC_PLOT, "Channel");
++ for (uint8_t channel = 0; channel < NUM_CHANNELS; channel++) {
++ if (!rank_in_ch(ctrl, rank, channel))
++ continue;
++
++ printk(JWLC_PLOT, "\t\t%u\t", channel);
++ }
++ printk(JWLC_PLOT, "\nByte\t");
++ for (uint8_t channel = 0; channel < NUM_CHANNELS; channel++) {
++ if (!rank_in_ch(ctrl, rank, channel))
++ continue;
++
++ printk(JWLC_PLOT, "\t");
++ for (uint8_t byte = 0; byte < ctrl->lanes; byte++)
++ printk(JWLC_PLOT, "%u ", byte);
++ }
++ printk(JWLC_PLOT, "\nDelay DqOffset");
++ bool done = false;
++ int8_t byte_sum[NUM_CHANNELS] = { 0 };
++ uint16_t byte_pass[NUM_CHANNELS] = { 0 };
++ for (uint8_t off = 0; off < ARRAY_SIZE(offsets); off++) {
++ for (uint8_t channel = 0; channel < NUM_CHANNELS; channel++) {
++ if (!rank_in_ch(ctrl, rank, channel))
++ continue;
++
++ const int16_t global_byte_off =
++ set_add_delay(&add_delay[channel], rank, offsets[off]);
++ for (uint8_t byte = 0; byte < ctrl->lanes; byte++) {
++ update_txt(ctrl, channel, rank, byte, TXT_DQDQS_OFF,
++ global_byte_off);
++ }
++ mchbar_write32(SC_WR_ADD_DELAY_ch(channel),
++ add_delay[channel]);
++ }
++ /* Reset FIFOs and DRAM DLL (Micron workaround) */
++ if (!ctrl->lpddr) {
++ io_reset();
++ for (uint8_t channel = 0; channel < NUM_CHANNELS; channel++) {
++ if (!rank_in_ch(ctrl, rank, channel))
++ continue;
++
++ reset_dram_dll(ctrl, channel, rank);
++ }
++ udelay(1);
++ }
++ for (uint8_t dq_offset = 0; dq_offset < dq_offset_max; dq_offset++) {
++ printk(JWLC_PLOT, "\n% 3d\t% 3d",
++ offsets[off], dq_offsets[dq_offset]);
++ change_1d_margin_multicast(
++ ctrl,
++ WrT,
++ dq_offsets[dq_offset],
++ rank,
++ false,
++ REG_FILE_USE_RANK);
++
++ /*
++ * Re-program the WDB pattern. Change the pattern
++ * for the next test to avoid false pass issues.
++ */
++ program_wdb_pattern(ctrl, invert);
++ invert = !invert;
++ run_io_test(ctrl, chanmask, BASIC_VA, true);
++ done = true;
++ for (uint8_t channel = 0; channel < NUM_CHANNELS; channel++) {
++ if (!rank_in_ch(ctrl, rank, channel))
++ continue;
++
++ printk(JWLC_PLOT, "\t");
++ uint16_t result = get_byte_group_errors(channel);
++ result &= valid_byte_mask;
++
++ /* Skip bytes that have failed or already passed */
++ const uint16_t skip_me = result | byte_pass[channel];
++ for (uint8_t byte = 0; byte < ctrl->lanes; byte++) {
++ const bool pass = result & BIT(byte);
++ printk(JWLC_PLOT, pass ? "# " : ". ");
++ if (skip_me & BIT(byte))
++ continue;
++
++ byte_pass[channel] |= BIT(byte);
++ byte_off[channel][byte] = offsets[off];
++ byte_sum[channel] += offsets[off];
++ }
++ if (byte_pass[channel] != valid_byte_mask)
++ done = false;
++ }
++ if (done)
++ break;
++ }
++ if (done)
++ break;
++ }
++ printk(BIOS_DEBUG, "\n\n");
++ if (!done) {
++ printk(BIOS_ERR, "JWLC: Could not find a pass for all bytes\n");
++ for (uint8_t channel = 0; channel < NUM_CHANNELS; channel++) {
++ if (!rank_in_ch(ctrl, rank, channel))
++ continue;
++
++ printk(BIOS_ERR, "Channel %u, rank %u fail:", channel, rank);
++ const uint16_t passing_mask = byte_pass[channel];
++ for (uint8_t byte = 0; byte < ctrl->lanes; byte++) {
++ if (BIT(byte) & passing_mask)
++ continue;
++
++ printk(BIOS_ERR, " %u", byte);
++ }
++ printk(BIOS_ERR, "\n");
++ }
++ status = RAMINIT_STATUS_JWRL_FAILURE;
++ break;
++ }
++ for (uint8_t channel = 0; channel < NUM_CHANNELS; channel++) {
++ if (!rank_in_ch(ctrl, rank, channel))
++ continue;
++
++ /* Refine target offset to make sure it works for all bytes */
++ int8_t target_off = DIV_ROUND_CLOSEST(byte_sum[channel], ctrl->lanes);
++ int16_t global_byte_off = 0;
++ uint8_t all_good_loops = 0;
++ bool all_good = 0;
++ while (!all_good) {
++ global_byte_off =
++ set_add_delay(&add_delay[channel], rank, target_off);
++ all_good = true;
++ for (uint8_t byte = 0; byte < ctrl->lanes; byte++) {
++ int16_t local_offset;
++ local_offset = byte_off[channel][byte] - target_off;
++ local_offset = local_offset * 128 + global_byte_off;
++ const uint16_t tx_dq = ctrl->tx_dq[channel][rank][byte];
++ if (tx_dq + local_offset >= (512 - 64)) {
++ all_good = false;
++ all_good_loops++;
++ target_off++;
++ break;
++ }
++ const uint16_t txdqs = ctrl->tx_dq[channel][rank][byte];
++ if (txdqs + local_offset < 96) {
++ all_good = false;
++ all_good_loops++;
++ target_off--;
++ break;
++ }
++ }
++ /* Avoid an infinite loop */
++ if (all_good_loops > 3)
++ break;
++ }
++ if (!all_good) {
++ printk(BIOS_ERR, "JWLC: Target offset refining failed\n");
++ status = RAMINIT_STATUS_JWRL_FAILURE;
++ break;
++ }
++ printk(BIOS_DEBUG, "C%u.R%u: Offset\tFinalEdge\n", channel, rank);
++ for (uint8_t byte = 0; byte < ctrl->lanes; byte++) {
++ int16_t local_offset;
++ local_offset = byte_off[channel][byte] - target_off;
++ local_offset = local_offset * 128 + global_byte_off;
++ ctrl->tx_dq[channel][rank][byte] += local_offset;
++ ctrl->txdqs[channel][rank][byte] += local_offset;
++ update_txt(ctrl, channel, rank, byte, TXT_RESTORE, 0);
++ printk(BIOS_DEBUG, " B%u: %d\t%d\n", byte, local_offset,
++ ctrl->txdqs[channel][rank][byte]);
++ }
++ mchbar_write32(SC_WR_ADD_DELAY_ch(channel), add_delay[channel]);
++ if (!ctrl->lpddr) {
++ reset_dram_dll(ctrl, channel, rank);
++ udelay(1);
++ }
++ printk(BIOS_DEBUG, "\n");
++ }
++ printk(BIOS_DEBUG, "\n");
++ }
++
++ /* Restore WDB after test */
++ write_wdb_va_pat(ctrl, 0, BASIC_VA_PAT_SPREAD_8, 8, 0);
++ program_wdb_lfsr(ctrl, false);
++ mchbar_write32(DDR_DATA_OFFSET_TRAIN, 0);
++
++ /** TODO: Do full JEDEC init instead? **/
++ io_reset();
++ return status;
++}
++
++static enum raminit_status verify_wl_width(const int32_t lwidth)
++{
++ if (lwidth <= 32) {
++ /* Check if width is valid */
++ printk(BIOS_ERR, "WrLevel: Width region (%d) too small\n", lwidth);
++ return RAMINIT_STATUS_JWRL_FAILURE;
++ }
++ if (lwidth >= 96) {
++ /* Since we're calibrating a phase, a too large region is a problem */
++ printk(BIOS_ERR, "WrLevel: Width region (%d) too large\n", lwidth);
++ return RAMINIT_STATUS_JWRL_FAILURE;
++ }
++ return 0;
++}
++
++enum raminit_status train_jedec_write_leveling(struct sysinfo *ctrl)
++{
++ /*
++ * Enabling WL mode causes DQS to toggle for 1024 QCLK.
++ * Wait for this to stop. Round up to nearest microsecond.
++ */
++ const bool wl_long_delay = ctrl->lpddr;
++ const uint32_t dqs_toggle_time = wl_long_delay ? 2048 : 1024;
++ const uint32_t wait_time_us = DIV_ROUND_UP(ctrl->qclkps * dqs_toggle_time, 1000 * 1000);
++
++ const uint16_t wl_start = 192;
++ const uint16_t wl_stop = 192 + 128;
++ const uint16_t wl_step = 2;
++
++ /* Do not use cached MR values */
++ const bool save_restore_mrs = ctrl->restore_mrs;
++ ctrl->restore_mrs = 0;
++
++ /* Propagate delay values (without a write command) */
++ for (uint8_t channel = 0; channel < NUM_CHANNELS; channel++) {
++ if (!does_ch_exist(ctrl, channel))
++ continue;
++
++ /* Propagate delay values from rank 0 to prevent assertion failures in RTL */
++ union ddr_data_control_0_reg data_control_0 = {
++ .raw = ctrl->dq_control_0[channel],
++ };
++ data_control_0.read_rf_rd = 0;
++ data_control_0.read_rf_wr = 1;
++ data_control_0.read_rf_rank = 0;
++ mchbar_write32(DDR_DATA_ch_CONTROL_0(channel), data_control_0.raw);
++ for (uint8_t byte = 0; byte < ctrl->lanes; byte++) {
++ union ddr_data_control_2_reg data_control_2 = {
++ .raw = ctrl->dq_control_2[channel][byte],
++ };
++ data_control_2.force_bias_on = 1;
++ data_control_2.force_rx_on = 0;
++ data_control_2.wl_long_delay = wl_long_delay;
++ mchbar_write32(DQ_CONTROL_2(channel, byte), data_control_2.raw);
++ }
++ }
++
++ if (ctrl->lpddr)
++ die("%s: Missing LPDDR support\n", __func__);
++
++ if (!ctrl->lpddr)
++ ddr3_program_mr1(ctrl, 0, 1);
++
++ enum raminit_status status = RAMINIT_STATUS_SUCCESS;
++ struct phase_train_data region_data[NUM_CHANNELS][NUM_LANES] = { 0 };
++ for (uint8_t rank = 0; rank < NUM_SLOTRANKS; rank++) {
++ if (!does_rank_exist(ctrl, rank))
++ continue;
++
++ for (uint8_t channel = 0; channel < NUM_CHANNELS; channel++) {
++ if (!rank_in_ch(ctrl, rank, channel))
++ continue;
++
++ /** TODO: Differs for LPDDR **/
++ uint16_t mr1reg = ctrl->mr1[channel][rank];
++ mr1reg &= ~MR1_QOFF_ENABLE;
++ mr1reg |= MR1_WL_ENABLE;
++ if (is_hsw_ult()) {
++ mr1reg &= ~RTTNOM_MASK;
++ mr1reg |= encode_ddr3_rttnom(120);
++ } else if (ctrl->dpc[channel] == 2) {
++ mr1reg &= ~RTTNOM_MASK;
++ mr1reg |= encode_ddr3_rttnom(60);
++ }
++ reut_issue_mrs(ctrl, channel, BIT(rank), 1, mr1reg);
++
++ /* Assert ODT for myself */
++ uint8_t odt_matrix = BIT(rank);
++ if (ctrl->dpc[channel] == 2) {
++ /* Assert ODT for non-target DIMM */
++ const uint8_t other_dimm = ((rank + 2) / 2) & 1;
++ odt_matrix |= BIT(2 * other_dimm);
++ }
++
++ union reut_misc_odt_ctrl_reg reut_misc_odt_ctrl = {
++ .raw = 0,
++ };
++ if (ctrl->lpddr) {
++ /* Only one ODT pin for ULT */
++ reut_misc_odt_ctrl.odt_on = 1;
++ reut_misc_odt_ctrl.odt_override = 1;
++ } else if (!is_hsw_ult()) {
++ reut_misc_odt_ctrl.odt_on = odt_matrix;
++ reut_misc_odt_ctrl.odt_override = 0xf;
++ }
++ mchbar_write32(REUT_ch_MISC_ODT_CTRL(channel), reut_misc_odt_ctrl.raw);
++ }
++ for (uint8_t channel = 0; channel < NUM_CHANNELS; channel++) {
++ if (!rank_in_ch(ctrl, rank, channel))
++ continue;
++
++ /*
++ * Enable write leveling mode in DDR and propagate delay
++ * values (without a write command). Stay in WL mode.
++ */
++ union ddr_data_control_0_reg data_control_0 = {
++ .raw = ctrl->dq_control_0[channel],
++ };
++ data_control_0.wl_training_mode = 1;
++ data_control_0.tx_pi_on = 1;
++ data_control_0.read_rf_rd = 0;
++ data_control_0.read_rf_wr = 1;
++ data_control_0.read_rf_rank = rank;
++ mchbar_write32(DDR_DATA_ch_CONTROL_0(channel), data_control_0.raw);
++ }
++ printk(BIOS_DEBUG, "\nRank %u\n", rank);
++ printk(JWRL_PLOT, "Channel\t");
++ for (uint8_t channel = 0; channel < NUM_CHANNELS; channel++) {
++ if (!rank_in_ch(ctrl, rank, channel))
++ continue;
++
++ printk(JWRL_PLOT, "%u", channel);
++ if (channel > 0)
++ continue;
++
++ for (uint8_t byte = 0; byte < ctrl->lanes; byte++)
++ printk(JWRL_PLOT, "\t");
++ }
++ printk(JWRL_PLOT, "\nByte");
++ for (uint8_t channel = 0; channel < NUM_CHANNELS; channel++) {
++ if (!rank_in_ch(ctrl, rank, channel))
++ continue;
++
++ for (uint8_t byte = 0; byte < ctrl->lanes; byte++)
++ printk(JWRL_PLOT, "\t%u", byte);
++ }
++ printk(JWRL_PLOT, "\nWlDelay");
++ for (uint16_t wl_delay = wl_start; wl_delay < wl_stop; wl_delay += wl_step) {
++ printk(JWRL_PLOT, "\n %3u:", wl_delay);
++ for (uint8_t channel = 0; channel < NUM_CHANNELS; channel++) {
++ if (!rank_in_ch(ctrl, rank, channel))
++ continue;
++
++ for (uint8_t byte = 0; byte < ctrl->lanes; byte++) {
++ update_txt(ctrl, channel, rank, byte, TXT_TXDQS,
++ wl_delay);
++ }
++ }
++ /* Wait for the first burst to finish */
++ if (wl_delay == wl_start)
++ udelay(wait_time_us);
++
++ io_reset();
++ udelay(wait_time_us);
++ for (uint8_t channel = 0; channel < NUM_CHANNELS; channel++) {
++ if (!rank_in_ch(ctrl, rank, channel))
++ continue;
++
++ for (uint8_t byte = 0; byte < ctrl->lanes; byte++) {
++ const uint32_t feedback =
++ get_data_train_feedback(channel, byte);
++ const bool pass = (feedback & 0x1ff) >= 16;
++ printk(JWRL_PLOT, "\t%c%u", pass ? '.' : '#', feedback);
++ phase_record_pass(
++ &region_data[channel][byte],
++ pass,
++ wl_delay,
++ wl_start,
++ wl_step);
++ }
++ }
++ }
++ printk(JWRL_PLOT, "\n");
++ printk(BIOS_DEBUG, "\n\tInitSt\tInitEn\tCurrSt\tCurrEn\tLargSt\tLargEn\n");
++ for (uint8_t channel = 0; channel < NUM_CHANNELS; channel++) {
++ if (!rank_in_ch(ctrl, rank, channel))
++ continue;
++
++ printk(BIOS_DEBUG, "C%u\n", channel);
++ for (uint8_t byte = 0; byte < ctrl->lanes; byte++) {
++ struct phase_train_data *data = &region_data[channel][byte];
++
++ phase_append_initial_to_current(data, wl_start, wl_step);
++ printk(BIOS_DEBUG, " B%u:\t%d\t%d\t%d\t%d\t%d\t%d\n",
++ byte,
++ data->initial.start,
++ data->initial.end,
++ data->current.start,
++ data->current.end,
++ data->largest.start,
++ data->largest.end);
++ }
++ }
++
++ /*
++ * Clean up after test. Very coarsely adjust for
++ * any cycle errors. Program values for TxDQS.
++ */
++ for (uint8_t channel = 0; channel < NUM_CHANNELS; channel++) {
++ if (!rank_in_ch(ctrl, rank, channel))
++ continue;
++
++ /* Clear ODT before MRS (JEDEC spec) */
++ mchbar_write32(REUT_ch_MISC_ODT_CTRL(channel), 0);
++
++ /** TODO: Differs for LPDDR **/
++ const uint16_t mr1reg = ctrl->mr1[channel][rank] | MR1_QOFF_ENABLE;
++ reut_issue_mrs(ctrl, channel, BIT(rank), 1, mr1reg);
++
++ printk(BIOS_DEBUG, "\nC%u.R%u: LftEdge Width\n", channel, rank);
++ const bool rank_x16 = ctrl->dimms[channel][rank / 2].data.width == 16;
++ for (uint8_t byte = 0; byte < ctrl->lanes; byte++) {
++ struct phase_train_data *data = &region_data[channel][byte];
++ const int32_t lwidth = range_width(data->largest);
++ int32_t tx_start = data->largest.start;
++ printk(BIOS_DEBUG, " B%u: %d\t%d\n", byte, tx_start, lwidth);
++ status = verify_wl_width(lwidth);
++ if (status) {
++ printk(BIOS_ERR,
++ "WrLevel problems on channel %u, byte %u\n",
++ channel, byte);
++ goto clean_up;
++ }
++
++ /* Align byte pairs if DIMM is x16 */
++ if (rank_x16 && (byte & 1)) {
++ const struct phase_train_data *const ref_data =
++ &region_data[channel][byte - 1];
++
++ if (tx_start > ref_data->largest.start + 64)
++ tx_start -= 128;
++
++ if (tx_start < ref_data->largest.start - 64)
++ tx_start += 128;
++ }
++
++ /* Fix for b4618067 - need to add 1 QCLK to DQS PI */
++ if (is_hsw_ult())
++ tx_start += 64;
++
++ assert(tx_start >= 0);
++ ctrl->txdqs[channel][rank][byte] = tx_start;
++ ctrl->tx_dq[channel][rank][byte] = tx_start + 32;
++ update_txt(ctrl, channel, rank, byte, TXT_RESTORE, 0);
++ }
++ }
++ printk(BIOS_DEBUG, "\n");
++ }
++
++clean_up:
++ for (uint8_t channel = 0; channel < NUM_CHANNELS; channel++) {
++ if (!does_ch_exist(ctrl, channel))
++ continue;
++
++ mchbar_write32(DDR_DATA_ch_CONTROL_0(channel), ctrl->dq_control_0[channel]);
++ for (uint8_t byte = 0; byte < ctrl->lanes; byte++) {
++ mchbar_write32(DQ_CONTROL_2(channel, byte),
++ ctrl->dq_control_2[channel][byte]);
++ }
++ }
++ if (!ctrl->lpddr)
++ ddr3_program_mr1(ctrl, 0, 0);
++
++ ctrl->restore_mrs = save_restore_mrs;
++
++ if (status)
++ return status;
++
++ /** TODO: If this step fails and dec_wrd is set, clear it and try again **/
++ return train_jedec_write_leveling_cleanup(ctrl);
++}
+diff --git a/src/northbridge/intel/haswell/registers/mchbar.h b/src/northbridge/intel/haswell/registers/mchbar.h
+index 6a31d3a32c..7c0b5a49de 100644
+--- a/src/northbridge/intel/haswell/registers/mchbar.h
++++ b/src/northbridge/intel/haswell/registers/mchbar.h
+@@ -121,6 +121,8 @@
+
+ #define REUT_ch_ERR_DATA_MASK(ch) _MCMAIN_C(0x40d8, ch)
+
++#define REUT_ch_ERR_MISC_STATUS(ch) _MCMAIN_C(0x40e8, ch)
++
+ #define REUT_ch_MISC_CKE_CTRL(ch) _MCMAIN_C(0x4190, ch)
+ #define REUT_ch_MISC_ODT_CTRL(ch) _MCMAIN_C(0x4194, ch)
+ #define REUT_ch_MISC_PAT_CADB_CTRL(ch) _MCMAIN_C(0x4198, ch)
+--
+2.39.2
+
diff --git a/config/coreboot/haswell/patches/0025-haswell-NRI-Add-final-raminit-steps.patch b/config/coreboot/haswell/patches/0025-haswell-NRI-Add-final-raminit-steps.patch
new file mode 100644
index 00000000..d15ea5d1
--- /dev/null
+++ b/config/coreboot/haswell/patches/0025-haswell-NRI-Add-final-raminit-steps.patch
@@ -0,0 +1,570 @@
+From d041b14f3af69db5f4598c84e3f53c9cd572ffb5 Mon Sep 17 00:00:00 2001
+From: Angel Pons <th3fanbus@gmail.com>
+Date: Sun, 8 May 2022 14:29:05 +0200
+Subject: [PATCH 25/26] haswell NRI: Add final raminit steps
+
+Implement the remaining raminit steps. Although many training steps are
+missing, this is enough to boot on the Asrock B85M Pro4.
+
+Change-Id: I94f3b65f0218d4da4fda4d84592dfd91f77f8f21
+Signed-off-by: Angel Pons <th3fanbus@gmail.com>
+---
+ src/northbridge/intel/haswell/Kconfig | 4 +-
+ .../intel/haswell/native_raminit/Makefile.inc | 1 +
+ .../haswell/native_raminit/activate_mc.c | 388 ++++++++++++++++++
+ .../haswell/native_raminit/raminit_main.c | 5 +-
+ .../haswell/native_raminit/raminit_native.c | 5 +-
+ .../haswell/native_raminit/raminit_native.h | 2 +
+ .../haswell/native_raminit/reg_structs.h | 12 +
+ .../intel/haswell/registers/mchbar.h | 7 +
+ 8 files changed, 416 insertions(+), 8 deletions(-)
+ create mode 100644 src/northbridge/intel/haswell/native_raminit/activate_mc.c
+
+diff --git a/src/northbridge/intel/haswell/Kconfig b/src/northbridge/intel/haswell/Kconfig
+index b659bf6d98..61f2a3c64c 100644
+--- a/src/northbridge/intel/haswell/Kconfig
++++ b/src/northbridge/intel/haswell/Kconfig
+@@ -10,12 +10,12 @@ config NORTHBRIDGE_INTEL_HASWELL
+ if NORTHBRIDGE_INTEL_HASWELL
+
+ config USE_NATIVE_RAMINIT
+- bool "[NOT WORKING] Use native raminit"
++ bool "[NOT COMPLETE] Use native raminit"
+ default n
+ select HAVE_DEBUG_RAM_SETUP
+ help
+ Select if you want to use coreboot implementation of raminit rather than
+- MRC.bin. Currently incomplete and does not boot.
++ MRC.bin. Currently incomplete and does not support S3 resume.
+
+ config HASWELL_VBOOT_IN_BOOTBLOCK
+ depends on VBOOT
+diff --git a/src/northbridge/intel/haswell/native_raminit/Makefile.inc b/src/northbridge/intel/haswell/native_raminit/Makefile.inc
+index 40c2f5e014..d97da72890 100644
+--- a/src/northbridge/intel/haswell/native_raminit/Makefile.inc
++++ b/src/northbridge/intel/haswell/native_raminit/Makefile.inc
+@@ -1,5 +1,6 @@
+ ## SPDX-License-Identifier: GPL-2.0-or-later
+
++romstage-y += activate_mc.c
+ romstage-y += change_margin.c
+ romstage-y += configure_mc.c
+ romstage-y += ddr3.c
+diff --git a/src/northbridge/intel/haswell/native_raminit/activate_mc.c b/src/northbridge/intel/haswell/native_raminit/activate_mc.c
+new file mode 100644
+index 0000000000..78a7ad27ef
+--- /dev/null
++++ b/src/northbridge/intel/haswell/native_raminit/activate_mc.c
+@@ -0,0 +1,388 @@
++/* SPDX-License-Identifier: GPL-2.0-or-later */
++
++#include <console/console.h>
++#include <delay.h>
++#include <device/pci_ops.h>
++#include <northbridge/intel/haswell/haswell.h>
++#include <timer.h>
++#include <types.h>
++
++#include "raminit_native.h"
++
++static void update_internal_clocks_on(struct sysinfo *ctrl)
++{
++ for (uint8_t channel = 0; channel < NUM_CHANNELS; channel++) {
++ if (!does_ch_exist(ctrl, channel))
++ continue;
++
++ bool clocks_on = false;
++ for (uint8_t byte = 0; byte < ctrl->lanes; byte++) {
++ const union ddr_data_control_1_reg data_control_1 = {
++ .raw = ctrl->dq_control_1[channel][byte],
++ };
++ const int8_t o_on = data_control_1.odt_delay;
++ const int8_t s_on = data_control_1.sense_amp_delay;
++ const int8_t o_off = data_control_1.odt_duration;
++ const int8_t s_off = data_control_1.sense_amp_duration;
++ if (o_on + o_off >= 7 || s_on + s_off >= 7) {
++ clocks_on = true;
++ break;
++ }
++ }
++ union ddr_data_control_0_reg data_control_0 = {
++ .raw = ctrl->dq_control_0[channel],
++ };
++ data_control_0.internal_clocks_on = clocks_on;
++ ctrl->dq_control_0[channel] = data_control_0.raw;
++ mchbar_write32(DDR_DATA_ch_CONTROL_0(channel), data_control_0.raw);
++ }
++}
++
++/* Switch off unused segments of the SDLL to save power */
++static void update_sdll_length(struct sysinfo *ctrl)
++{
++ for (uint8_t channel = 0; channel < NUM_CHANNELS; channel++) {
++ if (!does_ch_exist(ctrl, channel))
++ continue;
++
++ for (uint8_t byte = 0; byte < ctrl->lanes; byte++) {
++ uint8_t max_pi = 0;
++ for (uint8_t rank = 0; rank < NUM_SLOTRANKS; rank++) {
++ if (!rank_in_ch(ctrl, rank, channel))
++ continue;
++
++ const uint8_t rx_dqs_p = ctrl->rxdqsp[channel][rank][byte];
++ const uint8_t rx_dqs_n = ctrl->rxdqsn[channel][rank][byte];
++ max_pi = MAX(max_pi, MAX(rx_dqs_p, rx_dqs_n));
++ }
++ /* Update SDLL length for power savings */
++ union ddr_data_control_1_reg data_control_1 = {
++ .raw = ctrl->dq_control_1[channel][byte],
++ };
++ /* Calculate which segments to turn off */
++ data_control_1.sdll_segment_disable = (7 - (max_pi >> 3)) & ~1;
++ ctrl->dq_control_1[channel][byte] = data_control_1.raw;
++ mchbar_write32(DQ_CONTROL_1(channel, byte), data_control_1.raw);
++ }
++ }
++}
++
++static void set_rx_clk_stg_num(struct sysinfo *ctrl, const uint8_t channel)
++{
++ const uint8_t rcven_drift = ctrl->lpddr ? DIV_ROUND_UP(tDQSCK_DRIFT, ctrl->qclkps) : 1;
++ uint8_t max_rcven = 0;
++ for (uint8_t rank = 0; rank < NUM_SLOTRANKS; rank++) {
++ if (!rank_in_ch(ctrl, rank, channel))
++ continue;
++
++ for (uint8_t byte = 0; byte < ctrl->lanes; byte++)
++ max_rcven = MAX(max_rcven, ctrl->rcven[channel][rank][byte] / 64);
++ }
++ const union ddr_data_control_1_reg ddr_data_control_1 = {
++ .raw = ctrl->dq_control_1[channel][0],
++ };
++ const bool lpddr_long_odt = ddr_data_control_1.lpddr_long_odt_en;
++ const uint8_t rcven_turnoff = max_rcven + 18 + 2 * rcven_drift + lpddr_long_odt;
++ const union ddr_data_control_0_reg ddr_data_control_0 = {
++ .raw = ctrl->dq_control_0[channel],
++ };
++ for (uint8_t byte = 0; byte < ctrl->lanes; byte++) {
++ union ddr_data_control_2_reg ddr_data_control_2 = {
++ .raw = ctrl->dq_control_2[channel][byte],
++ };
++ if (ddr_data_control_0.odt_samp_extend_en) {
++ if (ddr_data_control_2.rx_clk_stg_num < rcven_turnoff)
++ ddr_data_control_2.rx_clk_stg_num = rcven_turnoff;
++ } else {
++ const int8_t o_on = ddr_data_control_1.odt_delay;
++ const int8_t o_off = ddr_data_control_1.odt_duration;
++ ddr_data_control_2.rx_clk_stg_num = MAX(17, o_on + o_off + 14);
++ }
++ ctrl->dq_control_2[channel][byte] = ddr_data_control_2.raw;
++ mchbar_write32(DQ_CONTROL_2(channel, byte), ddr_data_control_2.raw);
++ }
++}
++
++#define SELF_REFRESH_IDLE_COUNT 0x200
++
++static void enter_sr(void)
++{
++ mchbar_write32(PM_SREF_CONFIG, SELF_REFRESH_IDLE_COUNT | BIT(16));
++ udelay(1);
++}
++
++enum power_down_mode {
++ PDM_NO_PD = 0,
++ PDM_APD = 1,
++ PDM_PPD = 2,
++ PDM_PPD_DLL_OFF = 6,
++};
++
++static void power_down_config(struct sysinfo *ctrl)
++{
++ const enum power_down_mode pd_mode = ctrl->lpddr ? PDM_PPD : PDM_PPD_DLL_OFF;
++ mchbar_write32(PM_PDWN_CONFIG, pd_mode << 12 | 0x40);
++}
++
++static void train_power_modes_post(struct sysinfo *ctrl)
++{
++ for (uint8_t channel = 0; channel < NUM_CHANNELS; channel++) {
++ if (!does_ch_exist(ctrl, channel))
++ continue;
++
++ /* Adjust tCPDED and tPRPDEN */
++ if (ctrl->mem_clock_mhz >= 933)
++ ctrl->tc_bankrank_d[channel].tCPDED = 2;
++
++ if (ctrl->mem_clock_mhz >= 1066)
++ ctrl->tc_bankrank_d[channel].tPRPDEN = 2;
++
++ mchbar_write32(TC_BANK_RANK_D_ch(channel), ctrl->tc_bankrank_d[channel].raw);
++ }
++ power_down_config(ctrl);
++ mchbar_write32(MCDECS_CBIT, BIT(30)); /* dis_msg_clk_gate */
++}
++
++static uint8_t compute_burst_end_odt_delay(const struct sysinfo *const ctrl)
++{
++ /* Must be disabled for LPDDR */
++ if (ctrl->lpddr)
++ return 0;
++
++ const uint8_t beod = MIN(7, DIV_ROUND_CLOSEST(14300 * 20 / 100, ctrl->qclkps));
++ if (beod < 3)
++ return 0;
++
++ if (beod < 4)
++ return 4;
++
++ return beod;
++}
++
++static void program_burst_end_odt_delay(struct sysinfo *ctrl)
++{
++ /* Program burst_end_odt_delay - it should be zero during training steps */
++ const uint8_t beod = compute_burst_end_odt_delay(ctrl);
++ for (uint8_t channel = 0; channel < NUM_CHANNELS; channel++) {
++ if (!does_ch_exist(ctrl, channel))
++ continue;
++
++ for (uint8_t byte = 0; byte < ctrl->lanes; byte++) {
++ union ddr_data_control_1_reg ddr_data_control_1 = {
++ .raw = ctrl->dq_control_1[channel][byte],
++ };
++ ddr_data_control_1.burst_end_odt_delay = beod;
++ ctrl->dq_control_1[channel][byte] = ddr_data_control_1.raw;
++ mchbar_write32(DQ_CONTROL_1(channel, byte), ddr_data_control_1.raw);
++ }
++ }
++}
++
++/*
++ * Return a random value to use for scrambler seeds. Try to use RDRAND
++ * first and fall back to hardcoded values if RDRAND does not succeed.
++ */
++static uint16_t get_random_number(const uint8_t channel)
++{
++ /* The RDRAND instruction is only available 100k cycles after reset */
++ for (size_t i = 0; i < 100000; i++) {
++ uint32_t status;
++ uint32_t random;
++ /** TODO: Clean up asm **/
++ __asm__ __volatile__(
++ "\n\t .byte 0x0F, 0xC7, 0xF0"
++ "\n\t movl %%eax, %0"
++ "\n\t pushf"
++ "\n\t pop %%eax"
++ "\n\t movl %%eax, %1"
++ : "=m"(random),
++ "=m"(status)
++ : /* No inputs */
++ : "eax", "cc");
++
++ /* Only consider non-zero random values as valid */
++ if (status & 1 && random)
++ return random;
++ }
++
++ /* https://xkcd.com/221 */
++ if (channel)
++ return 0x28f4;
++ else
++ return 0x893e;
++}
++
++/* Work around "error: 'typeof' applied to a bit-field" */
++static inline uint32_t max(const uint32_t a, const uint32_t b)
++{
++ return MAX(a, b);
++}
++
++enum raminit_status activate_mc(struct sysinfo *ctrl)
++{
++ const bool enable_scrambling = true;
++ const bool enable_cmd_tristate = true;
++ for (uint8_t channel = 0; channel < NUM_CHANNELS; channel++) {
++ if (!does_ch_exist(ctrl, channel))
++ continue;
++
++ if (enable_scrambling && ctrl->stepping < STEPPING_C0) {
++ /* Make sure tRDRD_(sr, dr, dd) are at least 6 for scrambler W/A */
++ union tc_bank_rank_a_reg tc_bank_rank_a = {
++ .raw = mchbar_read32(TC_BANK_RANK_A_ch(channel)),
++ };
++ tc_bank_rank_a.tRDRD_sr = max(tc_bank_rank_a.tRDRD_sr, 6);
++ tc_bank_rank_a.tRDRD_dr = max(tc_bank_rank_a.tRDRD_dr, 6);
++ tc_bank_rank_a.tRDRD_dd = max(tc_bank_rank_a.tRDRD_dd, 6);
++ mchbar_write32(TC_BANK_RANK_A_ch(channel), tc_bank_rank_a.raw);
++ }
++ if (enable_scrambling) {
++ const union ddr_scramble_reg ddr_scramble = {
++ .scram_key = get_random_number(channel),
++ .scram_en = 1,
++ };
++ mchbar_write32(DDR_SCRAMBLE_ch(channel), ddr_scramble.raw);
++ }
++ if (ctrl->tCMD == 1) {
++ /* If we are in 1N mode, enable and set command rate limit to 3 */
++ union mcmain_command_rate_limit_reg cmd_rate_limit = {
++ .raw = mchbar_read32(COMMAND_RATE_LIMIT_ch(channel)),
++ };
++ cmd_rate_limit.enable_cmd_limit = 1;
++ cmd_rate_limit.cmd_rate_limit = 3;
++ mchbar_write32(COMMAND_RATE_LIMIT_ch(channel), cmd_rate_limit.raw);
++ }
++ if (enable_cmd_tristate) {
++ /* Enable command tri-state at the end of training */
++ union tc_bank_rank_a_reg tc_bank_rank_a = {
++ .raw = mchbar_read32(TC_BANK_RANK_A_ch(channel)),
++ };
++ tc_bank_rank_a.cmd_3st_dis = 0;
++ mchbar_write32(TC_BANK_RANK_A_ch(channel), tc_bank_rank_a.raw);
++ }
++ /* Set MC to normal mode and clean the ODT and CKE */
++ mchbar_write32(REUT_ch_SEQ_CFG(channel), REUT_MODE_NOP << 12);
++ /* Set again the rank occupancy */
++ mchbar_write8(MC_INIT_STATE_ch(channel), ctrl->rankmap[channel]);
++ if (ctrl->is_ecc) {
++ /* Enable ECC I/O and logic */
++ union mad_dimm_reg mad_dimm = {
++ .raw = mchbar_read32(MAD_DIMM(channel)),
++ };
++ mad_dimm.ecc_mode = 3;
++ mchbar_write32(MAD_DIMM(channel), mad_dimm.raw);
++ }
++ }
++
++ if (!is_hsw_ult())
++ update_internal_clocks_on(ctrl);
++
++ update_sdll_length(ctrl);
++
++ program_burst_end_odt_delay(ctrl);
++
++ if (is_hsw_ult()) {
++ for (uint8_t channel = 0; channel < NUM_CHANNELS; channel++) {
++ if (!does_ch_exist(ctrl, channel))
++ continue;
++
++ set_rx_clk_stg_num(ctrl, channel);
++ }
++ /** TODO: Program DDRPL_CR_DDR_TX_DELAY if Memory Trace is enabled **/
++ }
++
++ /* Enable periodic COMP */
++ mchbar_write32(M_COMP, (union pcu_comp_reg) {
++ .comp_interval = COMP_INT,
++ }.raw);
++
++ /* Enable the power mode before PCU starts working */
++ train_power_modes_post(ctrl);
++
++ /* Set idle timer and self refresh enable bits */
++ enter_sr();
++
++ /** FIXME: Do not hardcode power weights and RAPL settings **/
++ mchbar_write32(0x5888, 0x00000d0d);
++ mchbar_write32(0x5884, 0x00000004); /* 58.2 pJ */
++
++ mchbar_write32(0x58e0, 0);
++ mchbar_write32(0x58e4, 0);
++
++ mchbar_write32(0x5890, 0xffff);
++ mchbar_write32(0x5894, 0xffff);
++ mchbar_write32(0x5898, 0xffff);
++ mchbar_write32(0x589c, 0xffff);
++ mchbar_write32(0x58d0, 0xffff);
++ mchbar_write32(0x58d4, 0xffff);
++ mchbar_write32(0x58d8, 0xffff);
++ mchbar_write32(0x58dc, 0xffff);
++
++ /* Overwrite thermal parameters */
++ for (uint8_t channel = 0; channel < NUM_CHANNELS; channel++) {
++ mchbar_write32(_MCMAIN_C(0x42ec, channel), 0x0000000f);
++ mchbar_write32(_MCMAIN_C(0x42f0, channel), 0x00000009);
++ mchbar_write32(_MCMAIN_C(0x42f4, channel), 0x00000093);
++ mchbar_write32(_MCMAIN_C(0x42f8, channel), 0x00000087);
++ mchbar_write32(_MCMAIN_C(0x42fc, channel), 0x000000de);
++
++ /** TODO: Differs for LPDDR **/
++ mchbar_write32(PM_THRT_CKE_MIN_ch(channel), 0x30);
++ }
++ mchbar_write32(PCU_DDR_PTM_CTL, 0x40);
++ return RAMINIT_STATUS_SUCCESS;
++}
++
++static void mc_lockdown(void)
++{
++ /* Lock memory controller registers */
++ mchbar_write32(MC_LOCK, 0x8f);
++
++ /* MPCOHTRK_GDXC_OCLA_ADDRESS_HI_LOCK is set when programming the memory map */
++
++ /* Lock memory map registers */
++ pci_or_config16(HOST_BRIDGE, GGC, 1 << 0);
++ pci_or_config32(HOST_BRIDGE, DPR, 1 << 0);
++ pci_or_config32(HOST_BRIDGE, MESEG_LIMIT, 1 << 10);
++ pci_or_config32(HOST_BRIDGE, REMAPBASE, 1 << 0);
++ pci_or_config32(HOST_BRIDGE, REMAPLIMIT, 1 << 0);
++ pci_or_config32(HOST_BRIDGE, TOM, 1 << 0);
++ pci_or_config32(HOST_BRIDGE, TOUUD, 1 << 0);
++ pci_or_config32(HOST_BRIDGE, BDSM, 1 << 0);
++ pci_or_config32(HOST_BRIDGE, BGSM, 1 << 0);
++ pci_or_config32(HOST_BRIDGE, TOLUD, 1 << 0);
++}
++
++enum raminit_status raminit_done(struct sysinfo *ctrl)
++{
++ union mc_init_state_g_reg mc_init_state_g = {
++ .raw = mchbar_read32(MC_INIT_STATE_G),
++ };
++ mc_init_state_g.refresh_enable = 1;
++ mc_init_state_g.pu_mrc_done = 1;
++ mc_init_state_g.mrc_done = 1;
++ mchbar_write32(MC_INIT_STATE_G, mc_init_state_g.raw);
++
++ /* Lock the memory controller to enable normal operation */
++ mc_lockdown();
++
++ /* Poll for mc_init_done_ack to make sure memory initialization is complete */
++ printk(BIOS_DEBUG, "Waiting for mc_init_done acknowledgement... ");
++
++ struct stopwatch timer;
++ stopwatch_init_msecs_expire(&timer, 2000);
++ do {
++ mc_init_state_g.raw = mchbar_read32(MC_INIT_STATE_G);
++
++ /* DRAM will NOT work without the acknowledgement. There is no hope. */
++ if (stopwatch_expired(&timer))
++ die("\nTimed out waiting for mc_init_done acknowledgement\n");
++
++ } while (mc_init_state_g.mc_init_done_ack == 0);
++ printk(BIOS_DEBUG, "DONE!\n");
++
++ /* Provide some data for the graphics driver. Yes, it's hardcoded. */
++ mchbar_write32(SSKPD + 0, 0x05a2404f);
++ mchbar_write32(SSKPD + 4, 0x140000a0);
++ return RAMINIT_STATUS_SUCCESS;
++}
+diff --git a/src/northbridge/intel/haswell/native_raminit/raminit_main.c b/src/northbridge/intel/haswell/native_raminit/raminit_main.c
+index 1ff23be615..3a65fb01fb 100644
+--- a/src/northbridge/intel/haswell/native_raminit/raminit_main.c
++++ b/src/northbridge/intel/haswell/native_raminit/raminit_main.c
+@@ -63,6 +63,8 @@ static const struct task_entry cold_boot[] = {
+ { train_receive_enable, true, "RCVET", },
+ { train_read_mpr, true, "RDMPRT", },
+ { train_jedec_write_leveling, true, "JWRL", },
++ { activate_mc, true, "ACTIVATE", },
++ { raminit_done, true, "RAMINITEND", },
+ };
+
+ /* Return a generic stepping value to make stepping checks simpler */
+@@ -143,7 +145,4 @@ void raminit_main(const enum raminit_boot_mode bootmode)
+
+ if (status != RAMINIT_STATUS_SUCCESS)
+ die("Memory initialization was met with utmost failure and misery\n");
+-
+- /** TODO: Implement the required magic **/
+- die("NATIVE RAMINIT: More Magic (tm) required.\n");
+ }
+diff --git a/src/northbridge/intel/haswell/native_raminit/raminit_native.c b/src/northbridge/intel/haswell/native_raminit/raminit_native.c
+index bd9bc8e692..1ea729b23d 100644
+--- a/src/northbridge/intel/haswell/native_raminit/raminit_native.c
++++ b/src/northbridge/intel/haswell/native_raminit/raminit_native.c
+@@ -200,8 +200,6 @@ void perform_raminit(const int s3resume)
+ else
+ me_status = ME_INIT_STATUS_SUCCESS;
+
+- /** TODO: Remove this once raminit is implemented **/
+- me_status = ME_INIT_STATUS_ERROR;
+ intel_early_me_init_done(me_status);
+ }
+
+@@ -217,7 +215,8 @@ void perform_raminit(const int s3resume)
+ }
+
+ /* Save training data on non-S3 resumes */
+- if (!s3resume)
++ /** TODO: Enable this once training data is populated **/
++ if (0 && !s3resume)
+ save_mrc_data(&md);
+
+ /** TODO: setup_sdram_meminfo **/
+diff --git a/src/northbridge/intel/haswell/native_raminit/raminit_native.h b/src/northbridge/intel/haswell/native_raminit/raminit_native.h
+index 666b233c45..98e39cb76e 100644
+--- a/src/northbridge/intel/haswell/native_raminit/raminit_native.h
++++ b/src/northbridge/intel/haswell/native_raminit/raminit_native.h
+@@ -449,6 +449,8 @@ enum raminit_status do_jedec_init(struct sysinfo *ctrl);
+ enum raminit_status train_receive_enable(struct sysinfo *ctrl);
+ enum raminit_status train_read_mpr(struct sysinfo *ctrl);
+ enum raminit_status train_jedec_write_leveling(struct sysinfo *ctrl);
++enum raminit_status activate_mc(struct sysinfo *ctrl);
++enum raminit_status raminit_done(struct sysinfo *ctrl);
+
+ void configure_timings(struct sysinfo *ctrl);
+ void configure_refresh(struct sysinfo *ctrl);
+diff --git a/src/northbridge/intel/haswell/native_raminit/reg_structs.h b/src/northbridge/intel/haswell/native_raminit/reg_structs.h
+index a0e36ed082..0d9aaa1f7c 100644
+--- a/src/northbridge/intel/haswell/native_raminit/reg_structs.h
++++ b/src/northbridge/intel/haswell/native_raminit/reg_structs.h
+@@ -294,6 +294,18 @@ union ddr_cke_ctl_controls_reg {
+ uint32_t raw;
+ };
+
++union ddr_scramble_reg {
++ struct __packed {
++ uint32_t scram_en : 1; // Bits 0:0
++ uint32_t scram_key : 16; // Bits 16:1
++ uint32_t clk_gate_ab : 2; // Bits 18:17
++ uint32_t clk_gate_c : 2; // Bits 20:19
++ uint32_t en_dbi_ab : 1; // Bits 21:21
++ uint32_t : 10; // Bits 31:17
++ };
++ uint32_t raw;
++};
++
+ union ddr_scram_misc_control_reg {
+ struct __packed {
+ uint32_t wl_wake_cycles : 2; // Bits 1:0
+diff --git a/src/northbridge/intel/haswell/registers/mchbar.h b/src/northbridge/intel/haswell/registers/mchbar.h
+index 7c0b5a49de..49a215aa71 100644
+--- a/src/northbridge/intel/haswell/registers/mchbar.h
++++ b/src/northbridge/intel/haswell/registers/mchbar.h
+@@ -20,6 +20,7 @@
+
+ #define DDR_DATA_TRAIN_FEEDBACK(ch, byte) _DDRIO_C_R_B(0x0054, ch, 0, byte)
+
++#define DQ_CONTROL_1(ch, byte) _DDRIO_C_R_B(0x0060, ch, 0, byte)
+ #define DQ_CONTROL_2(ch, byte) _DDRIO_C_R_B(0x0064, ch, 0, byte)
+ #define DDR_DATA_OFFSET_TRAIN_ch_b(ch, byte) _DDRIO_C_R_B(0x0070, ch, 0, byte)
+ #define DQ_CONTROL_0(ch, byte) _DDRIO_C_R_B(0x0074, ch, 0, byte)
+@@ -147,6 +148,8 @@
+ #define QCLK_ch_LDAT_SDAT(ch) _MCMAIN_C(0x42d4, ch)
+ #define QCLK_ch_LDAT_DATA_IN_x(ch, x) _MCMAIN_C_X(0x42dc, ch, x) /* x in 0 .. 1 */
+
++#define PM_THRT_CKE_MIN_ch(ch) _MCMAIN_C(0x4328, ch)
++
+ #define REUT_GLOBAL_CTL 0x4800
+ #define REUT_GLOBAL_ERR 0x4804
+
+@@ -175,6 +178,8 @@
+
+ #define MCSCHEDS_DFT_MISC 0x4c30
+
++#define PM_PDWN_CONFIG 0x4cb0
++
+ #define REUT_ERR_DATA_STATUS 0x4ce0
+
+ #define REUT_MISC_CKE_CTRL 0x4d90
+@@ -186,8 +191,10 @@
+ #define MAD_CHNL 0x5000 /* Address Decoder Channel Configuration */
+ #define MAD_DIMM(ch) (0x5004 + (ch) * 4)
+ #define MAD_ZR 0x5014
++#define MCDECS_CBIT 0x501c
+ #define MC_INIT_STATE_G 0x5030
+ #define MRC_REVISION 0x5034 /* MRC Revision */
++#define PM_SREF_CONFIG 0x5060
+
+ #define RCOMP_TIMER 0x5084
+
+--
+2.39.2
+
diff --git a/config/coreboot/haswell/patches/0026-Remove-warning-for-coreboot-images-built-without-a-p.patch b/config/coreboot/haswell/patches/0026-Remove-warning-for-coreboot-images-built-without-a-p.patch
new file mode 100644
index 00000000..547c6392
--- /dev/null
+++ b/config/coreboot/haswell/patches/0026-Remove-warning-for-coreboot-images-built-without-a-p.patch
@@ -0,0 +1,38 @@
+From 1ce4f118b024a6367382b46016781f30fe622e3e Mon Sep 17 00:00:00 2001
+From: Nicholas Chin <nic.c3.14@gmail.com>
+Date: Fri, 12 May 2023 19:55:15 -0600
+Subject: [PATCH] Remove warning for coreboot images built without a payload
+
+I added this in upstream to prevent people from accidentally flashing
+roms without a payload resulting in a no boot situation, but in
+libreboot lbmk handles the payload and thus this warning always comes
+up. This has caused confusion and concern so just patch it out.
+---
+ payloads/Makefile.inc | 13 +------------
+ 1 file changed, 1 insertion(+), 12 deletions(-)
+
+diff --git a/payloads/Makefile.inc b/payloads/Makefile.inc
+index e735443a76..4f1692a873 100644
+--- a/payloads/Makefile.inc
++++ b/payloads/Makefile.inc
+@@ -49,16 +49,5 @@ distclean-payloads:
+ print-repo-info-payloads:
+ -$(foreach payload, $(PAYLOADS_LIST), $(MAKE) -C $(payload) print-repo-info 2>/dev/null; )
+
+-ifeq ($(CONFIG_PAYLOAD_NONE),y)
+-files_added:: warn_no_payload
+-endif
+-
+-warn_no_payload:
+- printf "\n\t** WARNING **\n"
+- printf "coreboot has been built without a payload. Writing\n"
+- printf "a coreboot image without a payload to your board's\n"
+- printf "flash chip will result in a non-booting system. You\n"
+- printf "can use cbfstool to add a payload to the image.\n\n"
+-
+ .PHONY: force-payload coreinfo nvramcui
+-.PHONY: clean-payloads distclean-payloads print-repo-info-payloads warn_no_payload
++.PHONY: clean-payloads distclean-payloads print-repo-info-payloads
+--
+2.40.1
+
diff --git a/config/coreboot/haswell/patches/0027-coreboot-haswell-fix-acpica-downloads.patch b/config/coreboot/haswell/patches/0027-coreboot-haswell-fix-acpica-downloads.patch
new file mode 100644
index 00000000..292d60e9
--- /dev/null
+++ b/config/coreboot/haswell/patches/0027-coreboot-haswell-fix-acpica-downloads.patch
@@ -0,0 +1,30 @@
+From 29c1116ebd5879568010a8386e4838294a78b408 Mon Sep 17 00:00:00 2001
+From: Leah Rowe <leah@libreboot.org>
+Date: Sun, 16 Jul 2023 03:48:23 +0100
+Subject: [PATCH 1/1] coreboot/haswell: fix acpica downloads
+
+the upstream link died. i now host the relevant acpica
+tarball myself, on libreboot rsync. this patch makes
+coreboot crossgcc use that
+
+Signed-off-by: Leah Rowe <leah@libreboot.org>
+---
+ util/crossgcc/buildgcc | 2 +-
+ 1 file changed, 1 insertion(+), 1 deletion(-)
+
+diff --git a/util/crossgcc/buildgcc b/util/crossgcc/buildgcc
+index 3c4b10cc92..0c4262b7b1 100755
+--- a/util/crossgcc/buildgcc
++++ b/util/crossgcc/buildgcc
+@@ -52,7 +52,7 @@ MPFR_ARCHIVE="https://ftpmirror.gnu.org/mpfr/mpfr-${MPFR_VERSION}.tar.xz"
+ MPC_ARCHIVE="https://ftpmirror.gnu.org/mpc/mpc-${MPC_VERSION}.tar.gz"
+ GCC_ARCHIVE="https://ftpmirror.gnu.org/gcc/gcc-${GCC_VERSION}/gcc-${GCC_VERSION}.tar.xz"
+ BINUTILS_ARCHIVE="https://ftpmirror.gnu.org/binutils/binutils-${BINUTILS_VERSION}.tar.xz"
+-IASL_ARCHIVE="https://acpica.org/sites/acpica/files/acpica-unix2-${IASL_VERSION}.tar.gz"
++IASL_ARCHIVE="https://mirror.math.princeton.edu/pub/libreboot/misc/acpica/acpica-unix2-${IASL_VERSION}.tar.gz"
+ # CLANG toolchain archive locations
+ LLVM_ARCHIVE="https://github.com/llvm/llvm-project/releases/download/llvmorg-${CLANG_VERSION}/llvm-${CLANG_VERSION}.src.tar.xz"
+ CLANG_ARCHIVE="https://github.com/llvm/llvm-project/releases/download/llvmorg-${CLANG_VERSION}/clang-${CLANG_VERSION}.src.tar.xz"
+--
+2.40.1
+