diff options
Diffstat (limited to 'config/coreboot/haswell')
28 files changed, 12163 insertions, 0 deletions
| diff --git a/config/coreboot/haswell/patches/0001-commonlib-clamp.h-Add-more-clamping-functions.patch b/config/coreboot/haswell/patches/0001-commonlib-clamp.h-Add-more-clamping-functions.patch new file mode 100644 index 00000000..96e4c14d --- /dev/null +++ b/config/coreboot/haswell/patches/0001-commonlib-clamp.h-Add-more-clamping-functions.patch @@ -0,0 +1,54 @@ +From dd58f5e9108bc596c93071705d2b53233d13ade6 Mon Sep 17 00:00:00 2001 +From: Angel Pons <th3fanbus@gmail.com> +Date: Sat, 7 May 2022 20:36:10 +0200 +Subject: [PATCH 01/26] commonlib/clamp.h: Add more clamping functions + +Add more clamping functions that work with different types. + +Change-Id: I14cf335d5a54f769f8fd9184450957e876affd6b +Signed-off-by: Angel Pons <th3fanbus@gmail.com> +--- + src/commonlib/include/commonlib/clamp.h | 26 +++++++++++++++++-------- + 1 file changed, 18 insertions(+), 8 deletions(-) + +diff --git a/src/commonlib/include/commonlib/clamp.h b/src/commonlib/include/commonlib/clamp.h +index e01a107ed4..526185195c 100644 +--- a/src/commonlib/include/commonlib/clamp.h ++++ b/src/commonlib/include/commonlib/clamp.h +@@ -8,15 +8,25 @@ + /* +  * Clamp a value, so that it is between a lower and an upper bound. +  */ +-static inline u32 clamp_u32(const u32 min, const u32 val, const u32 max) +-{ +-	if (val > max) +-		return max; ++#define __MAKE_CLAMP_FUNC(type) \ ++	static inline type clamp_##type(const type min, const type val, const type max) \ ++	{				\ ++		if (val > max)		\ ++			return max;	\ ++		if (val < min)		\ ++			return min;	\ ++		return val;		\ ++	}				\ +  +-	if (val < min) +-		return min; ++__MAKE_CLAMP_FUNC(s8)	/* clamp_s8  */ ++__MAKE_CLAMP_FUNC(u8)	/* clamp_u8  */ ++__MAKE_CLAMP_FUNC(s16)	/* clamp_s16 */ ++__MAKE_CLAMP_FUNC(u16)	/* clamp_u16 */ ++__MAKE_CLAMP_FUNC(s32)	/* clamp_s32 */ ++__MAKE_CLAMP_FUNC(u32)	/* clamp_u32 */ ++__MAKE_CLAMP_FUNC(s64)	/* clamp_s64 */ ++__MAKE_CLAMP_FUNC(u64)	/* clamp_u64 */ +  +-	return val; +-} ++#undef __MAKE_CLAMP_FUNC +  + #endif /* COMMONLIB_CLAMP_H */ +--  +2.39.2 + diff --git a/config/coreboot/haswell/patches/0002-nb-intel-haswell-Introduce-option-to-not-use-MRC.bin.patch b/config/coreboot/haswell/patches/0002-nb-intel-haswell-Introduce-option-to-not-use-MRC.bin.patch new file mode 100644 index 00000000..35d5c89e --- /dev/null +++ b/config/coreboot/haswell/patches/0002-nb-intel-haswell-Introduce-option-to-not-use-MRC.bin.patch @@ -0,0 +1,143 @@ +From c07391821c32cafea950574b85468f5b3284b6df Mon Sep 17 00:00:00 2001 +From: Angel Pons <th3fanbus@gmail.com> +Date: Fri, 6 May 2022 21:12:14 +0200 +Subject: [PATCH 02/26] nb/intel/haswell: Introduce option to not use MRC.bin + +Introduce the `USE_NATIVE_RAMINIT` Kconfig option, which should allow +booting coreboot on Haswell mainboards without the need of the closed +source MRC.bin. For now, this option does not work at all; the needed +magic will be implemented in subsequent commits. Add a config file to +make sure the newly-introduced option gets build-tested. + +Change-Id: I46c77586f9b5771624082e07c60c205e578edd8e +Signed-off-by: Angel Pons <th3fanbus@gmail.com> +--- + configs/config.asrock_b85m_pro4.native_raminit    |  5 +++++ + src/northbridge/intel/haswell/Kconfig             | 13 +++++++++++++ + src/northbridge/intel/haswell/Makefile.inc        |  7 ++++++- + .../intel/haswell/native_raminit/Makefile.inc     |  3 +++ + .../intel/haswell/native_raminit/raminit_native.c | 15 +++++++++++++++ + 5 files changed, 42 insertions(+), 1 deletion(-) + create mode 100644 configs/config.asrock_b85m_pro4.native_raminit + create mode 100644 src/northbridge/intel/haswell/native_raminit/Makefile.inc + create mode 100644 src/northbridge/intel/haswell/native_raminit/raminit_native.c + +diff --git a/configs/config.asrock_b85m_pro4.native_raminit b/configs/config.asrock_b85m_pro4.native_raminit +new file mode 100644 +index 0000000000..2de538926f +--- /dev/null ++++ b/configs/config.asrock_b85m_pro4.native_raminit +@@ -0,0 +1,5 @@ ++# Configuration used to build-test native raminit ++CONFIG_VENDOR_ASROCK=y ++CONFIG_BOARD_ASROCK_B85M_PRO4=y ++CONFIG_USE_NATIVE_RAMINIT=y ++CONFIG_DEBUG_RAM_SETUP=y +diff --git a/src/northbridge/intel/haswell/Kconfig b/src/northbridge/intel/haswell/Kconfig +index 50acb09a91..b659bf6d98 100644 +--- a/src/northbridge/intel/haswell/Kconfig ++++ b/src/northbridge/intel/haswell/Kconfig +@@ -9,6 +9,14 @@ config NORTHBRIDGE_INTEL_HASWELL +  + if NORTHBRIDGE_INTEL_HASWELL +  ++config USE_NATIVE_RAMINIT ++	bool "[NOT WORKING] Use native raminit" ++	default n ++	select HAVE_DEBUG_RAM_SETUP ++	help ++	  Select if you want to use coreboot implementation of raminit rather than ++	  MRC.bin. Currently incomplete and does not boot. ++ + config HASWELL_VBOOT_IN_BOOTBLOCK + 	depends on VBOOT + 	bool "Start verstage in bootblock" +@@ -45,6 +53,7 @@ config DCACHE_RAM_BASE +  + config DCACHE_RAM_SIZE + 	hex ++	default 0x40000 if USE_NATIVE_RAMINIT + 	default 0x10000 + 	help + 	  The size of the cache-as-ram region required during bootblock +@@ -53,12 +62,14 @@ config DCACHE_RAM_SIZE +  + config DCACHE_RAM_MRC_VAR_SIZE + 	hex ++	default 0x0 if USE_NATIVE_RAMINIT + 	default 0x30000 + 	help + 	  The amount of cache-as-ram region required by the reference code. +  + config DCACHE_BSP_STACK_SIZE + 	hex ++	default 0x20000 if USE_NATIVE_RAMINIT + 	default 0x2000 + 	help + 	  The amount of anticipated stack usage in CAR by bootblock and +@@ -66,6 +77,7 @@ config DCACHE_BSP_STACK_SIZE +  + config HAVE_MRC + 	bool "Add a System Agent binary" ++	depends on !USE_NATIVE_RAMINIT + 	help + 	  Select this option to add a System Agent binary to + 	  the resulting coreboot image. +@@ -82,6 +94,7 @@ config MRC_FILE +  + config HASWELL_HIDE_PEG_FROM_MRC + 	bool "Hide PEG devices from MRC to work around hardcoded MRC behavior" ++	depends on !USE_NATIVE_RAMINIT + 	default y + 	help + 	  If set, hides all PEG devices from MRC. This allows the iGPU +diff --git a/src/northbridge/intel/haswell/Makefile.inc b/src/northbridge/intel/haswell/Makefile.inc +index 2d1532be05..329f1f7ffe 100644 +--- a/src/northbridge/intel/haswell/Makefile.inc ++++ b/src/northbridge/intel/haswell/Makefile.inc +@@ -19,6 +19,11 @@ romstage-y += report_platform.c +  + postcar-y += memmap.c +  +-subdirs-y += haswell_mrc ++ifeq ($(CONFIG_USE_NATIVE_RAMINIT),y) ++subdirs-y  += native_raminit ++ ++else ++subdirs-y  += haswell_mrc ++endif +  + endif +diff --git a/src/northbridge/intel/haswell/native_raminit/Makefile.inc b/src/northbridge/intel/haswell/native_raminit/Makefile.inc +new file mode 100644 +index 0000000000..8cfb4fb33e +--- /dev/null ++++ b/src/northbridge/intel/haswell/native_raminit/Makefile.inc +@@ -0,0 +1,3 @@ ++## SPDX-License-Identifier: GPL-2.0-or-later ++ ++romstage-y += raminit_native.c +diff --git a/src/northbridge/intel/haswell/native_raminit/raminit_native.c b/src/northbridge/intel/haswell/native_raminit/raminit_native.c +new file mode 100644 +index 0000000000..1aafdf8659 +--- /dev/null ++++ b/src/northbridge/intel/haswell/native_raminit/raminit_native.c +@@ -0,0 +1,15 @@ ++/* SPDX-License-Identifier: GPL-2.0-or-later */ ++ ++#include <console/console.h> ++#include <northbridge/intel/haswell/raminit.h> ++ ++void perform_raminit(const int s3resume) ++{ ++	/* ++	 * See, this function's name is a lie. There are more things to ++	 * do that memory initialisation, but they are relatively easy. ++	 */ ++ ++	/** TODO: Implement the required magic **/ ++	die("NATIVE RAMINIT: More Magic (tm) required.\n"); ++} +--  +2.39.2 + diff --git a/config/coreboot/haswell/patches/0003-haswell-lynxpoint-Add-native-DMI-init.patch b/config/coreboot/haswell/patches/0003-haswell-lynxpoint-Add-native-DMI-init.patch new file mode 100644 index 00000000..4e70407c --- /dev/null +++ b/config/coreboot/haswell/patches/0003-haswell-lynxpoint-Add-native-DMI-init.patch @@ -0,0 +1,615 @@ +From 6ec71c6df97eded010e96c4ea2bd37cc6a13849d Mon Sep 17 00:00:00 2001 +From: Angel Pons <th3fanbus@gmail.com> +Date: Fri, 6 May 2022 21:56:48 +0200 +Subject: [PATCH 03/26] haswell/lynxpoint: Add native DMI init + +Implement native DMI init for Haswell and Lynx Point. This is only +needed on non-ULT platforms, and only when MRC.bin is not used. + +TEST=Verify DMI initialises correctly on Asrock B85M Pro4. + +Change-Id: I5fb1a2adc4ffbf0ebbf0d2d3a444055c53765faa +Signed-off-by: Angel Pons <th3fanbus@gmail.com> +--- + src/northbridge/intel/haswell/Makefile.inc    |   1 + + src/northbridge/intel/haswell/early_dmi.c     |  96 ++++++++++++ + src/northbridge/intel/haswell/early_pcie.c    | 121 ++++++++++++++ + src/northbridge/intel/haswell/haswell.h       |   3 + + .../haswell/native_raminit/raminit_native.c   |  15 ++ + src/northbridge/intel/haswell/vcu_mailbox.c   | 147 ++++++++++++++++++ + src/northbridge/intel/haswell/vcu_mailbox.h   |  16 ++ + src/southbridge/intel/lynxpoint/Makefile.inc  |   2 + + .../intel/lynxpoint/early_pch_native.c        |  52 +++++++ + src/southbridge/intel/lynxpoint/pch.h         |  20 ++- + 10 files changed, 472 insertions(+), 1 deletion(-) + create mode 100644 src/northbridge/intel/haswell/early_dmi.c + create mode 100644 src/northbridge/intel/haswell/early_pcie.c + create mode 100644 src/northbridge/intel/haswell/vcu_mailbox.c + create mode 100644 src/northbridge/intel/haswell/vcu_mailbox.h + create mode 100644 src/southbridge/intel/lynxpoint/early_pch_native.c + +diff --git a/src/northbridge/intel/haswell/Makefile.inc b/src/northbridge/intel/haswell/Makefile.inc +index 329f1f7ffe..df0b097296 100644 +--- a/src/northbridge/intel/haswell/Makefile.inc ++++ b/src/northbridge/intel/haswell/Makefile.inc +@@ -20,6 +20,7 @@ romstage-y += report_platform.c + postcar-y += memmap.c +  + ifeq ($(CONFIG_USE_NATIVE_RAMINIT),y) ++romstage-y += early_dmi.c early_pcie.c vcu_mailbox.c + subdirs-y  += native_raminit +  + else +diff --git a/src/northbridge/intel/haswell/early_dmi.c b/src/northbridge/intel/haswell/early_dmi.c +new file mode 100644 +index 0000000000..9941242fd5 +--- /dev/null ++++ b/src/northbridge/intel/haswell/early_dmi.c +@@ -0,0 +1,96 @@ ++/* SPDX-License-Identifier: GPL-2.0-or-later */ ++ ++#include <console/console.h> ++#include <northbridge/intel/haswell/haswell.h> ++#include <southbridge/intel/lynxpoint/pch.h> ++#include <types.h> ++ ++static void dmi_print_link_status(int loglevel) ++{ ++	const uint16_t dmilsts = dmibar_read16(DMILSTS); ++	printk(loglevel, "DMI: Running at Gen%u x%u\n", dmilsts & 0xf, dmilsts >> 4 & 0x1f); ++} ++ ++#define RETRAIN	(1 << 5) ++ ++#define LTRN	(1 << 11) ++ ++static void dmi_setup_physical_layer(void) ++{ ++	/* Program DMI AFE settings, which are needed for DMI to work */ ++	peg_dmi_recipe(false, 0); ++ ++	/* Additional DMI programming steps */ ++	dmibar_setbits32(0x258, 1 << 29); ++	dmibar_clrsetbits32(0x208, 0x7ff, 0x6b5); ++	dmibar_clrsetbits32(0x22c, 0xffff, 0x2020); ++ ++	/* Write SA reference code version */ ++	dmibar_write32(0x71c, 0x0000000f); ++	dmibar_write32(0x720, 0x01060200); ++ ++	/* We also have to bring up the PCH side of the DMI link */ ++	pch_dmi_setup_physical_layer(); ++ ++	/* Write-once settings */ ++	dmibar_clrsetbits32(DMILCAP, 0x3f00f, 2 << 0); ++ ++	printk(BIOS_DEBUG, "Retraining DMI at Gen2 speeds...\n"); ++	dmi_print_link_status(BIOS_DEBUG); ++ ++	/* Retrain link */ ++	dmibar_setbits16(DMILCTL, RETRAIN); ++	do {} while (dmibar_read16(DMILSTS) & LTRN); ++	dmi_print_link_status(BIOS_DEBUG); ++ ++	/* Retrain link again for DMI Gen2 speeds */ ++	dmibar_setbits16(DMILCTL, RETRAIN); ++	do {} while (dmibar_read16(DMILSTS) & LTRN); ++	dmi_print_link_status(BIOS_INFO); ++} ++ ++#define VC_ACTIVE	(1U << 31) ++ ++#define VCNEGPND	(1 << 1) ++ ++#define DMI_VC_CFG(vcid, tcmap)	(VC_ACTIVE | ((vcid) << 24) | (tcmap)) ++ ++static void dmi_tc_vc_mapping(void) ++{ ++	printk(BIOS_DEBUG, "Programming SA  DMI VC/TC mappings...\n"); ++ ++	if (CONFIG(INTEL_LYNXPOINT_LP)) ++		dmibar_setbits8(0xa78, 1 << 1); ++ ++	/* Each TC is mapped to one and only one VC */ ++	const u32 vc0 = DMI_VC_CFG(0, (1 << 6) | (1 << 5) | (1 << 4) | (1 << 3) | (1 << 0)); ++	const u32 vc1 = DMI_VC_CFG(1, (1 << 1)); ++	const u32 vcp = DMI_VC_CFG(2, (1 << 2)); ++	const u32 vcm = DMI_VC_CFG(7, (1 << 7)); ++	dmibar_write32(DMIVC0RCTL, vc0); ++	dmibar_write32(DMIVC1RCTL, vc1); ++	dmibar_write32(DMIVCPRCTL, vcp); ++	dmibar_write32(DMIVCMRCTL, vcm); ++ ++	/* Set Extended VC Count (EVCC) to 1 if VC1 is active */ ++	dmibar_clrsetbits8(DMIPVCCAP1, 7, !!(vc1 & VC_ACTIVE)); ++ ++	/* ++	 * We also have to program the PCH side of the DMI link. Since both ends ++	 * must use the same Virtual Channel settings, we pass them as arguments. ++	 */ ++	pch_dmi_tc_vc_mapping(vc0, vc1, vcp, vcm); ++ ++	printk(BIOS_DEBUG, "Waiting for SA  DMI VC negotiation... "); ++	do {} while (dmibar_read16(DMIVC0RSTS) & VCNEGPND); ++	do {} while (dmibar_read16(DMIVC1RSTS) & VCNEGPND); ++	do {} while (dmibar_read16(DMIVCPRSTS) & VCNEGPND); ++	do {} while (dmibar_read16(DMIVCMRSTS) & VCNEGPND); ++	printk(BIOS_DEBUG, "done!\n"); ++} ++ ++void dmi_early_init(void) ++{ ++	dmi_setup_physical_layer(); ++	dmi_tc_vc_mapping(); ++} +diff --git a/src/northbridge/intel/haswell/early_pcie.c b/src/northbridge/intel/haswell/early_pcie.c +new file mode 100644 +index 0000000000..d3940e3fac +--- /dev/null ++++ b/src/northbridge/intel/haswell/early_pcie.c +@@ -0,0 +1,121 @@ ++/* SPDX-License-Identifier: GPL-2.0-or-later */ ++ ++#include <console/console.h> ++#include <device/pci_def.h> ++#include <device/pci_mmio_cfg.h> ++#include <device/pci_ops.h> ++#include <northbridge/intel/haswell/haswell.h> ++#include <northbridge/intel/haswell/vcu_mailbox.h> ++#include <types.h> ++ ++#define PEG_DEV(func)		PCI_DEV(0, 1, func) ++ ++#define MAX_PEG_FUNC		3 ++ ++static void peg_dmi_unset_and_set_mask_pcicfg( ++	volatile union pci_bank *const bank, ++	const uint32_t offset, ++	const uint32_t unset_mask, ++	const uint32_t set_mask, ++	const uint32_t shift, ++	const bool valid) ++{ ++	if (!valid) ++		return; ++ ++	volatile uint32_t *const addr = &bank->reg32[offset / sizeof(uint32_t)]; ++	clrsetbits32(addr, unset_mask << shift, set_mask << shift); ++} ++ ++static void peg_dmi_unset_and_set_mask_common( ++	const bool is_peg, ++	const uint32_t offset, ++	const uint32_t unset, ++	const uint32_t set, ++	const uint32_t shift, ++	const bool valid) ++{ ++	const uint32_t unset_mask = unset << shift; ++	const uint32_t   set_mask =   set << shift; ++	if (is_peg) { ++		for (uint8_t i = 0; i < MAX_PEG_FUNC; i++) ++			pci_update_config32(PEG_DEV(i), offset, ~unset_mask, set_mask); ++	} else { ++		dmibar_clrsetbits32(offset, unset_mask, set_mask); ++	} ++} ++ ++static void peg_dmi_unset_and_set_mask_vcu_mmio( ++	const uint32_t addr, ++	const uint32_t unset_mask, ++	const uint32_t set_mask, ++	const uint32_t shift, ++	const bool valid) ++{ ++	if (!valid) ++		return; ++ ++	vcu_update_mmio(addr, ~(unset_mask << shift), set_mask << shift); ++} ++ ++#define BUNDLE_STEP	0x20 ++ ++static void *const dmibar = (void *)(uintptr_t)CONFIG_FIXED_DMIBAR_MMIO_BASE; ++ ++void peg_dmi_recipe(const bool is_peg, const pci_devfn_t dev) ++{ ++	const bool always = true; ++	const bool is_dmi = !is_peg; ++ ++	/* Treat DMIBAR and PEG devices the same way */ ++	volatile union pci_bank *const bank = is_peg ? pci_map_bus(dev) : dmibar; ++ ++	const size_t bundles = (is_peg ? 8 : 2) * BUNDLE_STEP; ++ ++	for (size_t i = 0; i < bundles; i += BUNDLE_STEP) { ++		/* These are actually per-lane */ ++		peg_dmi_unset_and_set_mask_pcicfg(bank, 0xa00 + i, 0x1f, 0x0c,  0, always); ++		peg_dmi_unset_and_set_mask_pcicfg(bank, 0xa10 + i, 0x1f, 0x0c,  0, always); ++	} ++ ++	for (size_t i = 0; i < bundles; i += BUNDLE_STEP) ++		peg_dmi_unset_and_set_mask_pcicfg(bank, 0x904 + i, 0x1f, 0x02,  0, is_peg); ++ ++	for (size_t i = 0; i < bundles; i += BUNDLE_STEP) ++		peg_dmi_unset_and_set_mask_pcicfg(bank, 0x904 + i, 0x1f, 0x03,  5, is_peg); ++ ++	for (size_t i = 0; i < bundles; i += BUNDLE_STEP) ++		peg_dmi_unset_and_set_mask_pcicfg(bank, 0x90c + i, 0x3f, 0x09,  5, always); ++ ++	for (size_t i = 0; i < bundles; i += BUNDLE_STEP) ++		peg_dmi_unset_and_set_mask_pcicfg(bank, 0x90c + i, 0x0f, 0x05, 21, is_peg); ++ ++	for (size_t i = 0; i < bundles; i += BUNDLE_STEP) ++		peg_dmi_unset_and_set_mask_pcicfg(bank, 0x910 + i, 0x0f, 0x08,  6, is_peg); ++ ++	for (size_t i = 0; i < bundles; i += BUNDLE_STEP) ++		peg_dmi_unset_and_set_mask_pcicfg(bank, 0x910 + i, 0x0f, 0x00, 10, always); ++ ++	for (size_t i = 0; i < bundles; i += BUNDLE_STEP) ++		peg_dmi_unset_and_set_mask_pcicfg(bank, 0x910 + i, 0x07, 0x00, 18, always); ++ ++	peg_dmi_unset_and_set_mask_vcu_mmio(0x0c008001, 0x1f, 0x03, 25, is_peg); ++	peg_dmi_unset_and_set_mask_vcu_mmio(0x0c0c8001, 0x3f, 0x00, 23, is_dmi); ++ ++	peg_dmi_unset_and_set_mask_pcicfg(bank, 0xc28, 0x1f, 0x13, 18, always); ++ ++	peg_dmi_unset_and_set_mask_common(is_peg, 0xc38, 0x01, 0x00,  6, always); ++	peg_dmi_unset_and_set_mask_common(is_peg, 0x260, 0x03, 0x02,  0, always); ++ ++	for (size_t i = 0; i < bundles; i += BUNDLE_STEP) ++		peg_dmi_unset_and_set_mask_pcicfg(bank, 0x900 + i, 0x03, 0x00, 26, always); ++ ++	for (size_t i = 0; i < bundles; i += BUNDLE_STEP) ++		peg_dmi_unset_and_set_mask_pcicfg(bank, 0x904 + i, 0x03, 0x03, 10, always); ++ ++	for (size_t i = 0; i < bundles; i += BUNDLE_STEP) ++		peg_dmi_unset_and_set_mask_pcicfg(bank, 0x90c + i, 0x1f, 0x07, 25, is_peg); ++ ++	for (size_t i = 0; i < bundles; i += BUNDLE_STEP) ++		peg_dmi_unset_and_set_mask_pcicfg(bank, 0x91c + i, 0x07, 0x05, 27, is_peg); ++} +diff --git a/src/northbridge/intel/haswell/haswell.h b/src/northbridge/intel/haswell/haswell.h +index 1b29f6baf0..30b4abd0a7 100644 +--- a/src/northbridge/intel/haswell/haswell.h ++++ b/src/northbridge/intel/haswell/haswell.h +@@ -34,6 +34,9 @@ void haswell_early_initialization(void); + void haswell_late_initialization(void); + void haswell_unhide_peg(void); +  ++void dmi_early_init(void); ++void peg_dmi_recipe(const bool is_peg, const pci_devfn_t dev); ++ + void report_platform_info(void); +  + struct acpi_rsdp; +diff --git a/src/northbridge/intel/haswell/native_raminit/raminit_native.c b/src/northbridge/intel/haswell/native_raminit/raminit_native.c +index 1aafdf8659..0938e026e3 100644 +--- a/src/northbridge/intel/haswell/native_raminit/raminit_native.c ++++ b/src/northbridge/intel/haswell/native_raminit/raminit_native.c +@@ -1,7 +1,19 @@ + /* SPDX-License-Identifier: GPL-2.0-or-later */ +  + #include <console/console.h> ++#include <northbridge/intel/haswell/haswell.h> + #include <northbridge/intel/haswell/raminit.h> ++#include <types.h> ++ ++static bool early_init_native(int s3resume) ++{ ++	printk(BIOS_DEBUG, "Starting native platform initialisation\n"); ++ ++	if (!CONFIG(INTEL_LYNXPOINT_LP)) ++		dmi_early_init(); ++ ++	return false; ++} +  + void perform_raminit(const int s3resume) + { +@@ -9,6 +21,9 @@ void perform_raminit(const int s3resume) + 	 * See, this function's name is a lie. There are more things to + 	 * do that memory initialisation, but they are relatively easy. + 	 */ ++	const bool cpu_replaced = early_init_native(s3resume); ++ ++	(void)cpu_replaced; +  + 	/** TODO: Implement the required magic **/ + 	die("NATIVE RAMINIT: More Magic (tm) required.\n"); +diff --git a/src/northbridge/intel/haswell/vcu_mailbox.c b/src/northbridge/intel/haswell/vcu_mailbox.c +new file mode 100644 +index 0000000000..aead144023 +--- /dev/null ++++ b/src/northbridge/intel/haswell/vcu_mailbox.c +@@ -0,0 +1,147 @@ ++/* SPDX-License-Identifier: GPL-2.0-or-later */ ++ ++#include <assert.h> ++#include <console/console.h> ++#include <delay.h> ++#include <northbridge/intel/haswell/haswell.h> ++#include <northbridge/intel/haswell/vcu_mailbox.h> ++#include <stdint.h> ++ ++/* ++ * This is a library for the VCU (Validation Control Unit) mailbox. This ++ * mailbox is primarily used to adjust some magic PCIe tuning parameters. ++ * ++ * There are two revisions of the VCU mailbox. Rev1 is specific to Haswell ++ * stepping A0, and all other steppings use Rev2. Haswell stepping A0 CPUs ++ * are early Engineering Samples with undocumented errata, and most likely ++ * need special microcode updates to boot. Thus, the code does not support ++ * VCU mailbox Rev1, because no one should need it anymore. ++ */ ++ ++#define VCU_MAILBOX_INTERFACE	0x6c00 ++#define VCU_MAILBOX_DATA	0x6c04 ++ ++#define VCU_RUN_BUSY		(1 << 31) ++ ++enum vcu_opcode { ++	VCU_OPCODE_READ_VCU_API_VER_ID	= 0x01, ++	VCU_OPCODE_OPEN_SEQ		= 0x02, ++	VCU_OPCODE_CLOSE_SEQ		= 0x03, ++	VCU_OPCODE_READ_DATA		= 0x07, ++	VCU_OPCODE_WRITE_DATA		= 0x08, ++	VCU_OPCODE_READ_CSR		= 0x13, ++	VCU_OPCODE_WRITE_CSR		= 0x14, ++	VCU_OPCODE_READ_MMIO		= 0x15, ++	VCU_OPCODE_WRITE_MMIO		= 0x16, ++}; ++ ++enum vcu_sequence { ++	SEQ_ID_READ_CSR		= 0x1, ++	SEQ_ID_WRITE_CSR	= 0x2, ++	SEQ_ID_READ_MMIO	= 0x3, ++	SEQ_ID_WRITE_MMIO	= 0x4, ++}; ++ ++#define VCU_RESPONSE_MASK		0xffff ++#define VCU_RESPONSE_SUCCESS		0x40 ++#define VCU_RESPONSE_BUSY		0x80 ++#define VCU_RESPONSE_THREAD_UNAVAILABLE	0x82 ++#define VCU_RESPONSE_ILLEGAL		0x90 ++ ++/* FIXME: Use timer API */ ++static void send_vcu_command(const enum vcu_opcode opcode, const uint32_t data) ++{ ++	for (unsigned int i = 0; i < 10; i++) { ++		mchbar_write32(VCU_MAILBOX_DATA, data); ++		mchbar_write32(VCU_MAILBOX_INTERFACE, opcode | VCU_RUN_BUSY); ++		uint32_t vcu_interface; ++		for (unsigned int j = 0; j < 100; j++) { ++			vcu_interface = mchbar_read32(VCU_MAILBOX_INTERFACE); ++			if (!(vcu_interface & VCU_RUN_BUSY)) ++				break; ++ ++			udelay(10); ++		} ++		if (vcu_interface & VCU_RUN_BUSY) ++			continue; ++ ++		if ((vcu_interface & VCU_RESPONSE_MASK) == VCU_RESPONSE_SUCCESS) ++			return; ++	} ++	printk(BIOS_ERR, "VCU: Failed to send command\n"); ++} ++ ++static enum vcu_opcode get_register_opcode(enum vcu_sequence seq) ++{ ++	switch (seq) { ++	case SEQ_ID_READ_CSR: ++		return VCU_OPCODE_READ_CSR; ++	case SEQ_ID_WRITE_CSR: ++		return VCU_OPCODE_WRITE_CSR; ++	case SEQ_ID_READ_MMIO: ++		return VCU_OPCODE_READ_MMIO; ++	case SEQ_ID_WRITE_MMIO: ++		return VCU_OPCODE_WRITE_MMIO; ++	default: ++		return dead_code_t(enum vcu_opcode); ++	} ++} ++ ++static enum vcu_opcode get_data_opcode(enum vcu_sequence seq) ++{ ++	switch (seq) { ++	case SEQ_ID_READ_CSR: ++	case SEQ_ID_READ_MMIO: ++		return VCU_OPCODE_READ_DATA; ++	case SEQ_ID_WRITE_CSR: ++	case SEQ_ID_WRITE_MMIO: ++		return VCU_OPCODE_WRITE_DATA; ++	default: ++		return dead_code_t(enum vcu_opcode); ++	} ++} ++ ++static uint32_t send_vcu_sequence(uint32_t addr, enum vcu_sequence seq, uint32_t wr_data) ++{ ++	send_vcu_command(VCU_OPCODE_OPEN_SEQ, seq); ++ ++	send_vcu_command(get_register_opcode(seq), addr); ++ ++	send_vcu_command(get_data_opcode(seq), wr_data); ++ ++	const uint32_t rd_data = mchbar_read32(VCU_MAILBOX_DATA); ++ ++	send_vcu_command(VCU_OPCODE_CLOSE_SEQ, seq); ++ ++	return rd_data; ++} ++ ++uint32_t vcu_read_csr(uint32_t addr) ++{ ++	return send_vcu_sequence(addr, SEQ_ID_READ_CSR, 0); ++} ++ ++void vcu_write_csr(uint32_t addr, uint32_t data) ++{ ++	send_vcu_sequence(addr, SEQ_ID_WRITE_CSR, data); ++} ++ ++void vcu_update_csr(uint32_t addr, uint32_t andvalue, uint32_t orvalue) ++{ ++	vcu_write_csr(addr, (vcu_read_csr(addr) & andvalue) | orvalue); ++} ++ ++uint32_t vcu_read_mmio(uint32_t addr) ++{ ++	return send_vcu_sequence(addr, SEQ_ID_READ_MMIO, 0); ++} ++ ++void vcu_write_mmio(uint32_t addr, uint32_t data) ++{ ++	send_vcu_sequence(addr, SEQ_ID_WRITE_MMIO, data); ++} ++ ++void vcu_update_mmio(uint32_t addr, uint32_t andvalue, uint32_t orvalue) ++{ ++	vcu_write_mmio(addr, (vcu_read_mmio(addr) & andvalue) | orvalue); ++} +diff --git a/src/northbridge/intel/haswell/vcu_mailbox.h b/src/northbridge/intel/haswell/vcu_mailbox.h +new file mode 100644 +index 0000000000..ba0a62e486 +--- /dev/null ++++ b/src/northbridge/intel/haswell/vcu_mailbox.h +@@ -0,0 +1,16 @@ ++/* SPDX-License-Identifier: GPL-2.0-or-later */ ++ ++#ifndef HASWELL_VCU_MAILBOX_H ++#define HASWELL_VCU_MAILBOX_H ++ ++#include <stdint.h> ++ ++uint32_t vcu_read_csr(uint32_t addr); ++void vcu_write_csr(uint32_t addr, uint32_t data); ++void vcu_update_csr(uint32_t addr, uint32_t andvalue, uint32_t orvalue); ++ ++uint32_t vcu_read_mmio(uint32_t addr); ++void vcu_write_mmio(uint32_t addr, uint32_t data); ++void vcu_update_mmio(uint32_t addr, uint32_t andvalue, uint32_t orvalue); ++ ++#endif /* HASWELL_VCU_MAILBOX_H */ +diff --git a/src/southbridge/intel/lynxpoint/Makefile.inc b/src/southbridge/intel/lynxpoint/Makefile.inc +index 02022d348d..b8503ac8bc 100644 +--- a/src/southbridge/intel/lynxpoint/Makefile.inc ++++ b/src/southbridge/intel/lynxpoint/Makefile.inc +@@ -37,6 +37,8 @@ bootblock-y += early_pch.c + romstage-y += early_usb.c early_me.c me_status.c early_pch.c + romstage-y += pmutil.c +  ++romstage-$(CONFIG_USE_NATIVE_RAMINIT) += early_pch_native.c ++ + ifeq ($(CONFIG_INTEL_LYNXPOINT_LP),y) + romstage-y += lp_gpio.c + ramstage-y += lp_gpio.c +diff --git a/src/southbridge/intel/lynxpoint/early_pch_native.c b/src/southbridge/intel/lynxpoint/early_pch_native.c +new file mode 100644 +index 0000000000..c28ddfcf5d +--- /dev/null ++++ b/src/southbridge/intel/lynxpoint/early_pch_native.c +@@ -0,0 +1,52 @@ ++/* SPDX-License-Identifier: GPL-2.0-or-later */ ++ ++#include <console/console.h> ++#include <device/pci_ops.h> ++#include <southbridge/intel/lynxpoint/pch.h> ++#include <types.h> ++ ++void pch_dmi_setup_physical_layer(void) ++{ ++	/* FIXME: We need to make sure the SA supports Gen2 as well */ ++	if ((RCBA32(0x21a4) & 0x0f) == 0x02) { ++		/* Set Gen 2 Common Clock N_FTS */ ++		RCBA32_AND_OR(0x2340, ~0x00ff0000, 0x3a << 16); ++ ++		/* Set Target Link Speed to DMI Gen2 */ ++		RCBA8_AND_OR(DLCTL2, ~0x07, 0x02); ++	} ++} ++ ++#define VC_ACTIVE	(1U << 31) ++ ++#define VCNEGPND	(1 << 1) ++ ++void pch_dmi_tc_vc_mapping(const u32 vc0, const u32 vc1, const u32 vcp, const u32 vcm) ++{ ++	printk(BIOS_DEBUG, "Programming PCH DMI VC/TC mappings...\n"); ++ ++	RCBA32_AND_OR(CIR0050, ~(0xf << 20), 2 << 20); ++	if (vcp & VC_ACTIVE) ++		RCBA32_OR(CIR0050, 1 << 19 | 1 << 17); ++ ++	RCBA32(CIR0050);	/* Posted Write */ ++ ++	/* Use the same virtual channel mapping on both ends of the DMI link */ ++	RCBA32(V0CTL) = vc0; ++	RCBA32(V1CTL) = vc1; ++	RCBA32(V1CTL);		/* Posted Write */ ++	RCBA32(VPCTL) = vcp; ++	RCBA32(VPCTL);		/* Posted Write */ ++	RCBA32(VMCTL) = vcm; ++ ++	/* Lock the registers */ ++	RCBA32_OR(CIR0050, 1U << 31); ++	RCBA32(CIR0050);	/* Posted Write */ ++ ++	printk(BIOS_DEBUG, "Waiting for PCH DMI VC negotiation... "); ++	do {} while (RCBA16(V0STS) & VCNEGPND); ++	do {} while (RCBA16(V1STS) & VCNEGPND); ++	do {} while (RCBA16(VPSTS) & VCNEGPND); ++	do {} while (RCBA16(VMSTS) & VCNEGPND); ++	printk(BIOS_DEBUG, "done!\n"); ++} +diff --git a/src/southbridge/intel/lynxpoint/pch.h b/src/southbridge/intel/lynxpoint/pch.h +index 7d9fc6d6af..b5e0c2a830 100644 +--- a/src/southbridge/intel/lynxpoint/pch.h ++++ b/src/southbridge/intel/lynxpoint/pch.h +@@ -113,6 +113,9 @@ enum pch_platform_type { + 	PCH_TYPE_ULT	 = 5, + }; +  ++void pch_dmi_setup_physical_layer(void); ++void pch_dmi_tc_vc_mapping(u32 vc0, u32 vc1, u32 vcp, u32 vcm); ++ + void usb_ehci_sleep_prepare(pci_devfn_t dev, u8 slp_typ); + void usb_ehci_disable(pci_devfn_t dev); + void usb_xhci_sleep_prepare(pci_devfn_t dev, u8 slp_typ); +@@ -406,9 +409,10 @@ void mainboard_config_rcba(void); +  + /* Southbridge IO BARs */ +  ++#define PMBASE			0x40 + #define GPIOBASE		0x48 +  +-#define PMBASE		0x40 ++#define CIR0050		0x0050	/* 32bit */ +  + #define RPC		0x0400	/* 32bit */ + #define RPFN		0x0404	/* 32bit */ +@@ -431,6 +435,20 @@ void mainboard_config_rcba(void); + #define IOTR2		0x1e90	/* 64bit */ + #define IOTR3		0x1e98	/* 64bit */ +  ++#define V0CTL		0x2014	/* 32bit */ ++#define V0STS		0x201a	/* 16bit */ ++ ++#define V1CTL		0x2020	/* 32bit */ ++#define V1STS		0x2026	/* 16bit */ ++ ++#define VPCTL		0x2030	/* 32bit */ ++#define VPSTS		0x2038	/* 16bit */ ++ ++#define VMCTL		0x2040	/* 32bit */ ++#define VMSTS		0x2048	/* 16bit */ ++ ++#define DLCTL2		0x21b0 ++ + #define TCTL		0x3000	/*  8bit */ +  + #define NOINT		0 +--  +2.39.2 + diff --git a/config/coreboot/haswell/patches/0004-haswell-lynxpoint-Add-native-early-ME-init.patch b/config/coreboot/haswell/patches/0004-haswell-lynxpoint-Add-native-early-ME-init.patch new file mode 100644 index 00000000..28dbc02a --- /dev/null +++ b/config/coreboot/haswell/patches/0004-haswell-lynxpoint-Add-native-early-ME-init.patch @@ -0,0 +1,148 @@ +From 98142e01fc8ebb3b762974e9e4de75e7f5c073b4 Mon Sep 17 00:00:00 2001 +From: Angel Pons <th3fanbus@gmail.com> +Date: Fri, 6 May 2022 22:18:21 +0200 +Subject: [PATCH 04/26] haswell/lynxpoint: Add native early ME init + +Implement native early ME init for Lynx Point. This is only needed when +MRC.bin is not used. + +Change-Id: If416e2078f139f26b4742c564b70e018725bf003 +Signed-off-by: Angel Pons <th3fanbus@gmail.com> +--- + .../haswell/native_raminit/raminit_native.c   | 17 ++++++++++- + src/southbridge/intel/lynxpoint/early_me.c    | 30 ++++++++++++++++++- + src/southbridge/intel/lynxpoint/me.h          |  7 +++-- + 3 files changed, 50 insertions(+), 4 deletions(-) + +diff --git a/src/northbridge/intel/haswell/native_raminit/raminit_native.c b/src/northbridge/intel/haswell/native_raminit/raminit_native.c +index 0938e026e3..6a002548c1 100644 +--- a/src/northbridge/intel/haswell/native_raminit/raminit_native.c ++++ b/src/northbridge/intel/haswell/native_raminit/raminit_native.c +@@ -1,18 +1,24 @@ + /* SPDX-License-Identifier: GPL-2.0-or-later */ +  + #include <console/console.h> ++#include <delay.h> + #include <northbridge/intel/haswell/haswell.h> + #include <northbridge/intel/haswell/raminit.h> ++#include <southbridge/intel/lynxpoint/me.h> + #include <types.h> +  + static bool early_init_native(int s3resume) + { + 	printk(BIOS_DEBUG, "Starting native platform initialisation\n"); +  ++	intel_early_me_init(); ++	/** TODO: CPU replacement check must be skipped in warm boots and S3 resumes **/ ++	const bool cpu_replaced = !s3resume && intel_early_me_cpu_replacement_check(); ++ + 	if (!CONFIG(INTEL_LYNXPOINT_LP)) + 		dmi_early_init(); +  +-	return false; ++	return cpu_replaced; + } +  + void perform_raminit(const int s3resume) +@@ -25,6 +31,15 @@ void perform_raminit(const int s3resume) +  + 	(void)cpu_replaced; +  ++	/** TODO: Move after raminit */ ++	if (intel_early_me_uma_size() > 0) { ++		/** TODO: Update status once raminit is implemented **/ ++		uint8_t me_status = ME_INIT_STATUS_ERROR; ++		intel_early_me_init_done(me_status); ++	} ++ ++	intel_early_me_status(); ++ + 	/** TODO: Implement the required magic **/ + 	die("NATIVE RAMINIT: More Magic (tm) required.\n"); + } +diff --git a/src/southbridge/intel/lynxpoint/early_me.c b/src/southbridge/intel/lynxpoint/early_me.c +index 947c570e16..07013c5539 100644 +--- a/src/southbridge/intel/lynxpoint/early_me.c ++++ b/src/southbridge/intel/lynxpoint/early_me.c +@@ -1,11 +1,12 @@ + /* SPDX-License-Identifier: GPL-2.0-only */ +  + #include <arch/io.h> ++#include <cf9_reset.h> + #include <device/pci_ops.h> + #include <console/console.h> + #include <delay.h> + #include <halt.h> +- ++#include <timer.h> + #include "me.h" + #include "pch.h" +  +@@ -60,6 +61,33 @@ int intel_early_me_init(void) + 	return 0; + } +  ++bool intel_early_me_cpu_replacement_check(void) ++{ ++	printk(BIOS_DEBUG, "ME: Checking whether CPU was replaced... "); ++ ++	struct stopwatch timer; ++	stopwatch_init_msecs_expire(&timer, 50); ++ ++	union me_hfs2 hfs2; ++	do { ++		hfs2.raw = pci_read_config32(PCH_ME_DEV, PCI_ME_HFS2); ++		if (stopwatch_expired(&timer)) { ++			/* Assume CPU was replaced just in case */ ++			printk(BIOS_DEBUG, "timed out, assuming CPU was replaced\n"); ++			return true; ++		} ++		udelay(ME_DELAY); ++	} while (!hfs2.cpu_replaced_valid); ++ ++	if (hfs2.warm_reset_request) { ++		printk(BIOS_DEBUG, "warm reset needed for dynamic fusing\n"); ++		system_reset(); ++	} ++ ++	printk(BIOS_DEBUG, "%sreplaced\n", hfs2.cpu_replaced_sts ? "" : "not "); ++	return hfs2.cpu_replaced_sts; ++} ++ + int intel_early_me_uma_size(void) + { + 	union me_uma uma = { .raw = pci_read_config32(PCH_ME_DEV, PCI_ME_UMA) }; +diff --git a/src/southbridge/intel/lynxpoint/me.h b/src/southbridge/intel/lynxpoint/me.h +index fe8b0260c4..6990322651 100644 +--- a/src/southbridge/intel/lynxpoint/me.h ++++ b/src/southbridge/intel/lynxpoint/me.h +@@ -177,14 +177,16 @@ union me_did { + union me_hfs2 { + 	struct __packed { + 		u32 bist_in_progress: 1; +-		u32 reserved1: 2; ++		u32 icc_prog_sts: 2; + 		u32 invoke_mebx: 1; + 		u32 cpu_replaced_sts: 1; + 		u32 mbp_rdy: 1; + 		u32 mfs_failure: 1; + 		u32 warm_reset_request: 1; + 		u32 cpu_replaced_valid: 1; +-		u32 reserved2: 4; ++		u32 reserved: 2; ++		u32 fw_upd_ipu: 1; ++		u32 reserved2: 1; + 		u32 mbp_cleared: 1; + 		u32 reserved3: 2; + 		u32 current_state: 8; +@@ -338,6 +340,7 @@ void intel_me_status(union me_hfs hfs, union me_hfs2 hfs2); +  + void intel_early_me_status(void); + int intel_early_me_init(void); ++bool intel_early_me_cpu_replacement_check(void); + int intel_early_me_uma_size(void); + int intel_early_me_init_done(u8 status); +  +--  +2.39.2 + diff --git a/config/coreboot/haswell/patches/0005-sb-intel-lynxpoint-Add-native-USB-init.patch b/config/coreboot/haswell/patches/0005-sb-intel-lynxpoint-Add-native-USB-init.patch new file mode 100644 index 00000000..d9c2570b --- /dev/null +++ b/config/coreboot/haswell/patches/0005-sb-intel-lynxpoint-Add-native-USB-init.patch @@ -0,0 +1,783 @@ +From 9bfb8614dbf1d9800ef8251cb3d839bcdbe5577f Mon Sep 17 00:00:00 2001 +From: Angel Pons <th3fanbus@gmail.com> +Date: Fri, 6 May 2022 23:17:39 +0200 +Subject: [PATCH 05/26] sb/intel/lynxpoint: Add native USB init + +Implement native USB initialisation for Lynx Point. This is only needed +when MRC.bin is not used. + +TO DO: Figure out how to deal with the FIXME's and TODO's lying around. + +Change-Id: Ie0fbeeca7b1ca1557173772d733fd2fa27703373 +Signed-off-by: Angel Pons <th3fanbus@gmail.com> +--- + .../haswell/native_raminit/raminit_native.c   |   3 + + src/southbridge/intel/lynxpoint/Makefile.inc  |   2 +- + src/southbridge/intel/lynxpoint/early_usb.c   |  11 - + .../intel/lynxpoint/early_usb_native.c        | 584 ++++++++++++++++++ + src/southbridge/intel/lynxpoint/pch.h         |  49 ++ + 5 files changed, 637 insertions(+), 12 deletions(-) + create mode 100644 src/southbridge/intel/lynxpoint/early_usb_native.c + +diff --git a/src/northbridge/intel/haswell/native_raminit/raminit_native.c b/src/northbridge/intel/haswell/native_raminit/raminit_native.c +index 6a002548c1..ef61d4ee09 100644 +--- a/src/northbridge/intel/haswell/native_raminit/raminit_native.c ++++ b/src/northbridge/intel/haswell/native_raminit/raminit_native.c +@@ -5,6 +5,7 @@ + #include <northbridge/intel/haswell/haswell.h> + #include <northbridge/intel/haswell/raminit.h> + #include <southbridge/intel/lynxpoint/me.h> ++#include <southbridge/intel/lynxpoint/pch.h> + #include <types.h> +  + static bool early_init_native(int s3resume) +@@ -15,6 +16,8 @@ static bool early_init_native(int s3resume) + 	/** TODO: CPU replacement check must be skipped in warm boots and S3 resumes **/ + 	const bool cpu_replaced = !s3resume && intel_early_me_cpu_replacement_check(); +  ++	early_usb_init(); ++ + 	if (!CONFIG(INTEL_LYNXPOINT_LP)) + 		dmi_early_init(); +  +diff --git a/src/southbridge/intel/lynxpoint/Makefile.inc b/src/southbridge/intel/lynxpoint/Makefile.inc +index b8503ac8bc..0e1f2fe4eb 100644 +--- a/src/southbridge/intel/lynxpoint/Makefile.inc ++++ b/src/southbridge/intel/lynxpoint/Makefile.inc +@@ -37,7 +37,7 @@ bootblock-y += early_pch.c + romstage-y += early_usb.c early_me.c me_status.c early_pch.c + romstage-y += pmutil.c +  +-romstage-$(CONFIG_USE_NATIVE_RAMINIT) += early_pch_native.c ++romstage-$(CONFIG_USE_NATIVE_RAMINIT) += early_pch_native.c early_usb_native.c iobp.c +  + ifeq ($(CONFIG_INTEL_LYNXPOINT_LP),y) + romstage-y += lp_gpio.c +diff --git a/src/southbridge/intel/lynxpoint/early_usb.c b/src/southbridge/intel/lynxpoint/early_usb.c +index a753681ce0..52e8ac17f8 100644 +--- a/src/southbridge/intel/lynxpoint/early_usb.c ++++ b/src/southbridge/intel/lynxpoint/early_usb.c +@@ -4,17 +4,6 @@ + #include <device/pci_def.h> + #include "pch.h" +  +-/* HCD_INDEX == 2 selects 0:1a.0 (PCH_EHCI2), any other index +- * selects 0:1d.0 (PCH_EHCI1) for usbdebug use. +- */ +-#if CONFIG_USBDEBUG_HCD_INDEX != 2 +-#define PCH_EHCI1_TEMP_BAR0 CONFIG_EHCI_BAR +-#define PCH_EHCI2_TEMP_BAR0 (PCH_EHCI1_TEMP_BAR0 + 0x400) +-#else +-#define PCH_EHCI2_TEMP_BAR0 CONFIG_EHCI_BAR +-#define PCH_EHCI1_TEMP_BAR0 (PCH_EHCI2_TEMP_BAR0 + 0x400) +-#endif +- + /* +  * Setup USB controller MMIO BAR to prevent the +  * reference code from resetting the controller. +diff --git a/src/southbridge/intel/lynxpoint/early_usb_native.c b/src/southbridge/intel/lynxpoint/early_usb_native.c +new file mode 100644 +index 0000000000..cb6f6ee8e6 +--- /dev/null ++++ b/src/southbridge/intel/lynxpoint/early_usb_native.c +@@ -0,0 +1,584 @@ ++/* SPDX-License-Identifier: GPL-2.0-or-later */ ++ ++#include <console/console.h> ++#include <delay.h> ++#include <device/mmio.h> ++#include <device/pci_def.h> ++#include <device/pci_ops.h> ++#include <northbridge/intel/haswell/haswell.h> ++#include <northbridge/intel/haswell/raminit.h> ++#include <southbridge/intel/lynxpoint/iobp.h> ++#include <southbridge/intel/lynxpoint/pch.h> ++#include <timer.h> ++#include <types.h> ++ ++static unsigned int is_usbr_enabled(void) ++{ ++	return !!(pci_read_config32(PCH_XHCI_DEV, XHCI_USB3FUS) & BIT(5)); ++} ++ ++static char *const xhci_bar = (char *)PCH_XHCI_TEMP_BAR0; ++ ++static void ehci_hcs_init(const pci_devfn_t dev, const uintptr_t ehci_bar) ++{ ++	pci_write_config32(dev, PCI_BASE_ADDRESS_0, ehci_bar); ++ ++	/** FIXME: Determine whether Bus Master is required (or clean it up afterwards) **/ ++	pci_or_config16(dev, PCI_COMMAND, PCI_COMMAND_MASTER | PCI_COMMAND_MEMORY); ++ ++	char *const mem_bar = (char *)ehci_bar; ++ ++	/** ++	 * Shared EHCI/XHCI ports w/a. ++	 * This step is required when some of the ports are routed to EHCI ++	 * and other ports are routed XHCI at the same time. ++	 * ++	 * FIXME: Under which conditions should this be done? ++	 */ ++	pci_and_config16(dev, 0x78, ~0x03); ++ ++	/* Skip reset if usbdebug is enabled */ ++	if (!CONFIG(USBDEBUG_IN_PRE_RAM)) ++		setbits32(mem_bar + EHCI_USB_CMD, EHCI_USB_CMD_HCRESET); ++ ++	/* 2: Configure number of controllers and ports */ ++	pci_or_config16(dev, EHCI_ACCESS_CNTL, ACCESS_CNTL_ENABLE); ++	clrsetbits32(mem_bar + EHCI_HCS_PARAMS, 0xf << 12, 0); ++	clrsetbits32(mem_bar + EHCI_HCS_PARAMS, 0xf <<  0, 2 + is_usbr_enabled()); ++	pci_and_config16(dev, EHCI_ACCESS_CNTL, ~ACCESS_CNTL_ENABLE); ++ ++	pci_or_config16(dev, 0x78, BIT(2)); ++	pci_or_config16(dev, 0x7c, BIT(14) | BIT(7)); ++	pci_update_config32(dev, 0x8c, ~(0xf << 8), (4 << 8)); ++	pci_update_config32(dev, 0x8c, ~BIT(26), BIT(17)); ++} ++ ++static inline unsigned int physical_port_count(void) ++{ ++	return MAX_USB2_PORTS; ++} ++ ++static unsigned int hs_port_count(void) ++{ ++	/** TODO: Apparently, WPT-LP has 10 USB2 ports **/ ++	if (CONFIG(INTEL_LYNXPOINT_LP)) ++		return 8; ++ ++	switch ((pci_read_config32(PCH_XHCI_DEV, XHCI_USB3FUS) >> 1) & 3) { ++	case 3: ++		return 8; ++	case 2: ++		return 10; ++	case 1: ++		return 12; ++	case 0: ++	default: ++		return 14; ++	} ++} ++ ++static unsigned int ss_port_count(void) ++{ ++	if (CONFIG(INTEL_LYNXPOINT_LP)) ++		return 4; ++ ++	switch ((pci_read_config32(PCH_XHCI_DEV, XHCI_USB3FUS) >> 3) & 3) { ++	case 3: ++		return 0; ++	case 2: ++		return 2; ++	case 1: ++		return 4; ++	case 0: ++	default: ++		return 6; ++	} ++} ++ ++static void common_ehci_hcs_init(void) ++{ ++	const bool is_lp = CONFIG(INTEL_LYNXPOINT_LP); ++ ++	ehci_hcs_init(PCH_EHCI1_DEV, PCH_EHCI1_TEMP_BAR0); ++	if (!is_lp) ++		ehci_hcs_init(PCH_EHCI2_DEV, PCH_EHCI2_TEMP_BAR0); ++ ++	pch_iobp_update(0xe5007f04, 0, 0x00004481); ++ ++	for (unsigned int port = 0; port < physical_port_count(); port++) ++		pch_iobp_update(0xe500400f + port * 0x100, ~(1 << 0), 0 << 0); ++ ++	pch_iobp_update(0xe5007f14, ~(3 << 19), (3 << 19)); ++ ++	if (is_lp) ++		pch_iobp_update(0xe5007f02, ~(3 << 22), (0 << 22)); ++} ++ ++static void xhci_open_memory_space(void) ++{ ++	/** FIXME: Determine whether Bus Master is required (or clean it up afterwards) **/ ++	pci_write_config32(PCH_XHCI_DEV, PCI_BASE_ADDRESS_0, (uintptr_t)xhci_bar); ++	pci_or_config16(PCH_XHCI_DEV, PCI_COMMAND, PCI_COMMAND_MASTER | PCI_COMMAND_MEMORY); ++} ++ ++static void xhci_close_memory_space(void) ++{ ++	pci_and_config16(PCH_XHCI_DEV, PCI_COMMAND, ~(PCI_COMMAND_MASTER | PCI_COMMAND_MEMORY)); ++	pci_write_config32(PCH_XHCI_DEV, PCI_BASE_ADDRESS_0, 0); ++} ++ ++static void common_xhci_hc_init(void) ++{ ++	const bool is_lp = CONFIG(INTEL_LYNXPOINT_LP); ++ ++	if (!is_lp) { ++		const unsigned int max_ports = 15 + ss_port_count(); ++		clrsetbits32(xhci_bar + XHCI_HCS_PARAMS_1, 0xf << 28, max_ports << 28); ++	} ++ ++	clrsetbits32(xhci_bar + XHCI_HCS_PARAMS_3, 0xffff << 16 | 0xff, 0x200 << 16 | 0x0a); ++	clrsetbits32(xhci_bar + XHCI_HCC_PARAMS, BIT(5), BIT(10) | BIT(9)); ++ ++	if (!is_lp) ++		clrsetbits32(xhci_bar + 0x8008, BIT(19), 0); ++ ++	if (is_lp) ++		clrsetbits32(xhci_bar + 0x8058, BIT(8), BIT(16)); ++	else ++		clrsetbits32(xhci_bar + 0x8058, BIT(8), BIT(16) | BIT(20)); ++ ++	clrsetbits32(xhci_bar + 0x8060, 0, BIT(25) | BIT(18)); ++	clrsetbits32(xhci_bar + 0x8090, 0, BIT(14) | BIT(8)); ++	clrsetbits32(xhci_bar + 0x8094, 0, BIT(23) | BIT(21) | BIT(14)); ++	clrsetbits32(xhci_bar + 0x80e0, BIT(16), BIT(6)); ++	clrsetbits32(xhci_bar + 0x80ec, (7 << 12) | (7 << 9), (0 << 12) | (6 << 9)); ++	clrsetbits32(xhci_bar + 0x80f0, BIT(20), 0); ++ ++	if (is_lp) ++		clrsetbits32(xhci_bar + 0x80fc, 0, BIT(25)); ++ ++	if (is_lp) ++		clrsetbits32(xhci_bar + 0x8110, BIT(8) | BIT(2), BIT(20) | BIT(11)); ++	else ++		clrsetbits32(xhci_bar + 0x8110, BIT(2), BIT(20) | BIT(11)); ++ ++	if (is_lp) ++		write32(xhci_bar + 0x8140, 0xff00f03c); ++	else ++		write32(xhci_bar + 0x8140, 0xff03c132); ++ ++	if (is_lp) ++		clrsetbits32(xhci_bar + 0x8154, BIT(21), BIT(13)); ++	else ++		clrsetbits32(xhci_bar + 0x8154, BIT(21) | BIT(13), 0); ++ ++	clrsetbits32(xhci_bar + 0x8154, BIT(3), 0); ++ ++	if (is_lp) { ++		clrsetbits32(xhci_bar + 0x8164, 0, BIT(1) | BIT(0)); ++		write32(xhci_bar + 0x8174, 0x01400c0a); ++		write32(xhci_bar + 0x817c, 0x033200a3); ++		write32(xhci_bar + 0x8180, 0x00cb0028); ++		write32(xhci_bar + 0x8184, 0x0064001e); ++	} ++ ++	/* ++	 * Note: Register at offset 0x44 is 32-bit, but bit 31 is write-once. ++	 * We use these weird partial accesses here to avoid locking bit 31. ++	 */ ++	pci_or_config16(PCH_XHCI_DEV, 0x44, BIT(15) | BIT(14) | BIT(10) | BIT(0)); ++	pci_or_config8(PCH_XHCI_DEV, 0x44 + 2, 0x0f); ++ ++	/* LPT-LP >= B0 */ ++	if (is_lp) ++		clrsetbits32(xhci_bar + 0x8188, 0, BIT(26) | BIT(24)); ++ ++	/* LPT-H >= C0 */ ++	if (!is_lp) ++		clrsetbits32(xhci_bar + 0x8188, 0, BIT(24)); ++} ++ ++static inline bool is_mem_sr(void) ++{ ++	return pci_read_config16(PCH_LPC_DEV, GEN_PMCON_2) & GEN_PMCON_2_MEM_SR; ++} ++ ++static bool should_restore_xhci_smart_auto(void) ++{ ++	if (!is_mem_sr()) ++		return false; ++ ++	return pci_read_config32(PCH_LPC_DEV, PMIR) & PMIR_XHCI_SMART_AUTO; ++} ++ ++enum usb_port_route { ++	ROUTE_TO_EHCI, ++	ROUTE_TO_XHCI, ++}; ++ ++/* Returns whether port reset was successful */ ++static bool reset_usb2_ports(const unsigned int ehci_ports) ++{ ++	for (unsigned int port = 0; port < ehci_ports; port++) { ++		/* Initiate port reset for all USB2 ports */ ++		clrsetbits32( ++			xhci_bar + XHCI_USB2_PORTSC(port), ++			XHCI_USB2_PORTSC_PED, ++			XHCI_USB2_PORTSC_PR); ++	} ++	/* Poll for port reset bit to be cleared or time out at 100ms */ ++	struct stopwatch timer; ++	stopwatch_init_msecs_expire(&timer, 100); ++	uint32_t reg32; ++	do { ++		reg32 = 0; ++		for (unsigned int port = 0; port < ehci_ports; port++) ++			reg32 |= read32(xhci_bar + XHCI_USB2_PORTSC(port)); ++ ++		reg32 &= XHCI_USB2_PORTSC_PR; ++		if (!reg32) { ++			const long elapsed_time = stopwatch_duration_usecs(&timer); ++			printk(BIOS_DEBUG, "%s: took %lu usecs\n", __func__, elapsed_time); ++			return true; ++		} ++		/* Reference code has a 10 ms delay here, but a smaller delay works too */ ++		udelay(100); ++	} while (!stopwatch_expired(&timer)); ++	printk(BIOS_ERR, "%s: timed out\n", __func__); ++	return !reg32; ++} ++ ++/* Returns whether warm reset was successful */ ++static bool warm_reset_usb3_ports(const unsigned int xhci_ports) ++{ ++	for (unsigned int port = 0; port < xhci_ports; port++) { ++		/* Initiate warm reset for all USB3 ports */ ++		clrsetbits32( ++			xhci_bar + XHCI_USB3_PORTSC(port), ++			XHCI_USB3_PORTSC_PED, ++			XHCI_USB3_PORTSC_WPR); ++	} ++	/* Poll for port reset bit to be cleared or time out at 100ms */ ++	struct stopwatch timer; ++	stopwatch_init_msecs_expire(&timer, 100); ++	uint32_t reg32; ++	do { ++		reg32 = 0; ++		for (unsigned int port = 0; port < xhci_ports; port++) ++			reg32 |= read32(xhci_bar + XHCI_USB3_PORTSC(port)); ++ ++		reg32 &= XHCI_USB3_PORTSC_PR; ++		if (!reg32) { ++			const long elapsed_time = stopwatch_duration_usecs(&timer); ++			printk(BIOS_DEBUG, "%s: took %lu usecs\n", __func__, elapsed_time); ++			return true; ++		} ++		/* Reference code has a 10 ms delay here, but a smaller delay works too */ ++		udelay(100); ++	} while (!stopwatch_expired(&timer)); ++	printk(BIOS_ERR, "%s: timed out\n", __func__); ++	return !reg32; ++} ++ ++static void perform_xhci_ehci_switching_flow(const enum usb_port_route usb_route) ++{ ++	const pci_devfn_t dev = PCH_XHCI_DEV; ++ ++	const unsigned int ehci_ports = hs_port_count() + is_usbr_enabled(); ++	const unsigned int xhci_ports = ss_port_count(); ++ ++	const uint32_t ehci_mask = BIT(ehci_ports) - 1; ++	const uint32_t xhci_mask = BIT(xhci_ports) - 1; ++ ++	/** TODO: Handle USBr port? How, though? **/ ++	pci_update_config32(dev, XHCI_USB2PRM, ~XHCI_USB2PR_HCSEL, ehci_mask); ++	pci_update_config32(dev, XHCI_USB3PRM, ~XHCI_USB3PR_SSEN,  xhci_mask); ++ ++	/* ++	 * Workaround for USB2PR / USB3PR value not surviving warm reset. ++	 * Restore USB Port Routing registers if OS HC Switch driver has been executed. ++	 */ ++	if (should_restore_xhci_smart_auto()) { ++		/** FIXME: Derive values from mainboard code instead? **/ ++		pci_update_config32(dev, XHCI_USB2PR, ~XHCI_USB2PR_HCSEL, ehci_mask); ++		pci_update_config32(dev, XHCI_USB3PR, ~XHCI_USB3PR_SSEN,  xhci_mask); ++	} ++ ++	/* Later stages shouldn't need the value of this bit */ ++	pci_and_config32(PCH_LPC_DEV, PMIR, ~PMIR_XHCI_SMART_AUTO); ++ ++	/** ++	 * FIXME: Things here depend on the chosen routing mode. ++	 *        For now, implement both functions. ++	 */ ++ ++	/* Route to EHCI if xHCI disabled or auto mode */ ++	if (usb_route == ROUTE_TO_EHCI) { ++		if (!reset_usb2_ports(ehci_ports)) ++			printk(BIOS_ERR, "USB2 port reset timed out\n"); ++ ++		pci_and_config32(dev, XHCI_USB2PR, ~XHCI_USB2PR_HCSEL); ++ ++		for (unsigned int port = 0; port < ehci_ports; port++) { ++			clrsetbits32( ++				xhci_bar + XHCI_USB2_PORTSC(port), ++				XHCI_USB2_PORTSC_PED, ++				XHCI_USB2_PORTSC_CHST); ++		} ++ ++		if (!warm_reset_usb3_ports(xhci_ports)) ++			printk(BIOS_ERR, "USB3 warm reset timed out\n"); ++ ++		/* FIXME: BWG says this should be inside the warm reset function */ ++		pci_and_config32(dev, XHCI_USB3PR, ~XHCI_USB3PR_SSEN); ++ ++		for (unsigned int port = 0; port < ehci_ports; port++) { ++			clrsetbits32( ++				xhci_bar + XHCI_USB3_PORTSC(port), ++				XHCI_USB3_PORTSC_PED, ++				XHCI_USB3_PORTSC_CHST); ++		} ++ ++		setbits32(xhci_bar + XHCI_USBCMD, BIT(0)); ++		clrbits32(xhci_bar + XHCI_USBCMD, BIT(0)); ++	} ++ ++	/* Route to xHCI if xHCI enabled */ ++	if (usb_route == ROUTE_TO_XHCI) { ++		if (is_mem_sr()) { ++			if (!warm_reset_usb3_ports(xhci_ports)) ++				printk(BIOS_ERR, "USB3 warm reset timed out\n"); ++		} ++ ++		const uint32_t xhci_port_mask = pci_read_config32(dev, XHCI_USB3PRM) & 0x3f; ++		pci_update_config32(dev, XHCI_USB3PR, ~XHCI_USB3PR_SSEN, xhci_port_mask); ++ ++		const uint32_t ehci_port_mask = pci_read_config32(dev, XHCI_USB2PRM) & 0x7fff; ++		pci_update_config32(dev, XHCI_USB2PR, ~XHCI_USB2PR_HCSEL, ehci_port_mask); ++	} ++} ++ ++/* Do not shift in this macro, as it can cause undefined behaviour for bad port/oc values */ ++#define PORT_TO_OC_SHIFT(port, oc)	((oc) * 8 + (port)) ++ ++/* Avoid shifting into undefined behaviour */ ++static inline bool shift_ok(const int shift) ++{ ++	return shift >= 0 && shift < 32; ++} ++ ++static void usb_overcurrent_mapping(void) ++{ ++	const bool is_lp = CONFIG(INTEL_LYNXPOINT_LP); ++ ++	uint32_t ehci_1_ocmap = 0; ++	uint32_t ehci_2_ocmap = 0; ++	uint32_t xhci_1_ocmap = 0; ++	uint32_t xhci_2_ocmap = 0; ++ ++	/* ++	 * EHCI ++	 */ ++	for (unsigned int idx = 0; idx < physical_port_count(); idx++) { ++		const struct usb2_port_config *const port = &mainboard_usb2_ports[idx]; ++		printk(BIOS_DEBUG, "USB2 port %u => ", idx); ++		if (!port->enable) { ++			printk(BIOS_DEBUG, "disabled\n"); ++			continue; ++		} ++		const unsigned short oc_pin = port->oc_pin; ++		if (oc_pin == USB_OC_PIN_SKIP) { ++			printk(BIOS_DEBUG, "not mapped to OC pin\n"); ++			continue; ++		} ++		/* Ports 0 .. 7 => OC 0 .. 3 */ ++		if (idx < 8 && oc_pin <= 3) { ++			const int shift = PORT_TO_OC_SHIFT(idx, oc_pin); ++			if (shift_ok(shift)) { ++				printk(BIOS_DEBUG, "mapped to OC pin %u\n", oc_pin); ++				ehci_1_ocmap |= 1 << shift; ++				continue; ++			} ++		} ++		/* Ports 8 .. 13 => OC 4 .. 7 (LPT-H only) */ ++		if (!is_lp && idx >= 8 && oc_pin >= 4) { ++			const int shift = PORT_TO_OC_SHIFT(idx, oc_pin - 4); ++			if (shift_ok(shift)) { ++				printk(BIOS_DEBUG, "mapped to OC pin %u\n", oc_pin); ++				ehci_2_ocmap |= 1 << shift; ++				continue; ++			} ++		} ++		printk(BIOS_ERR, "Invalid OC pin %u for USB2 port %u\n", oc_pin, idx); ++	} ++	printk(BIOS_DEBUG, "\n"); ++	pci_write_config32(PCH_EHCI1_DEV, EHCI_OCMAP, ehci_1_ocmap); ++	if (!is_lp) ++		pci_write_config32(PCH_EHCI2_DEV, EHCI_OCMAP, ehci_2_ocmap); ++ ++	/* ++	 * xHCI ++	 */ ++	for (unsigned int idx = 0; idx < ss_port_count(); idx++) { ++		const struct usb3_port_config *const port = &mainboard_usb3_ports[idx]; ++		printk(BIOS_DEBUG, "USB3 port %u => ", idx); ++		if (!port->enable) { ++			printk(BIOS_DEBUG, "disabled\n"); ++			continue; ++		} ++		const unsigned short oc_pin = port->oc_pin; ++		if (oc_pin == USB_OC_PIN_SKIP) { ++			printk(BIOS_DEBUG, "not mapped to OC pin\n"); ++			continue; ++		} ++		/* Ports 0 .. 5 => OC 0 .. 3 */ ++		if (oc_pin <= 3) { ++			const int shift = PORT_TO_OC_SHIFT(idx, oc_pin); ++			if (shift_ok(shift)) { ++				printk(BIOS_DEBUG, "mapped to OC pin %u\n", oc_pin); ++				xhci_1_ocmap |= 1 << shift; ++				continue; ++			} ++		} ++		/* Ports 0 .. 5 => OC 4 .. 7 (LPT-H only) */ ++		if (!is_lp && oc_pin >= 4) { ++			const int shift = PORT_TO_OC_SHIFT(idx, oc_pin - 4); ++			if (shift_ok(shift)) { ++				printk(BIOS_DEBUG, "mapped to OC pin %u\n", oc_pin); ++				xhci_2_ocmap |= 1 << shift; ++				continue; ++			} ++		} ++		printk(BIOS_ERR, "Invalid OC pin %u for USB3 port %u\n", oc_pin, idx); ++	} ++	printk(BIOS_DEBUG, "\n"); ++	pci_write_config32(PCH_XHCI_DEV, XHCI_U2OCM1, ehci_1_ocmap); ++	pci_write_config32(PCH_XHCI_DEV, XHCI_U3OCM1, xhci_1_ocmap); ++	if (!is_lp) { ++		pci_write_config32(PCH_XHCI_DEV, XHCI_U2OCM2, ehci_2_ocmap); ++		pci_write_config32(PCH_XHCI_DEV, XHCI_U3OCM2, xhci_2_ocmap); ++	} ++} ++ ++static uint8_t get_ehci_tune_param_1(const struct usb2_port_config *const port) ++{ ++	const bool is_lp = CONFIG(INTEL_LYNXPOINT_LP); ++ ++	const enum pch_platform_type plat_type = get_pch_platform_type(); ++	const enum usb2_port_location location = port->location; ++	const uint16_t length = port->length; ++	if (!is_lp) { ++		if (plat_type == PCH_TYPE_DESKTOP) { ++			if (location == USB_PORT_BACK_PANEL) ++				return 4; /* Back Panel */ ++			else ++				return 3; /* Front Panel */ ++ ++		} else if (plat_type == PCH_TYPE_MOBILE) { ++			if (location == USB_PORT_INTERNAL) ++				return 5; /* Internal Topology */ ++			else if (location == USB_PORT_DOCK) ++				return 4; /* Dock */ ++			else if (length < 0x70) ++				return 5; /* Back Panel, less than 7" */ ++			else ++				return 6; /* Back Panel, 7" or more */ ++		} ++	} else { ++		if (location == USB_PORT_BACK_PANEL || location == USB_PORT_MINI_PCIE) { ++			if (length < 0x70) ++				return 5; /* Back Panel, less than 7" */ ++			else ++				return 6; /* Back Panel, 7" or more */ ++		} else if (location == USB_PORT_DOCK) { ++			return 4; /* Dock */ ++		} else { ++			return 5; /* Internal Topology */ ++		} ++	} ++	printk(BIOS_ERR, "%s: Unhandled case\n", __func__); ++	return 0; ++} ++ ++static uint8_t get_ehci_tune_param_2(const struct usb2_port_config *const port) ++{ ++	const bool is_lp = CONFIG(INTEL_LYNXPOINT_LP); ++ ++	const enum pch_platform_type plat_type = get_pch_platform_type(); ++	const enum usb2_port_location location = port->location; ++	const uint16_t length = port->length; ++	if (!is_lp) { ++		if (plat_type == PCH_TYPE_DESKTOP) { ++			if (location == USB_PORT_BACK_PANEL) { ++				if (length < 0x80) ++					return 2; /* Back Panel, less than 8" */ ++				else if (length < 0x130) ++					return 3; /* Back Panel, 8"-13" */ ++				else ++					return 4; /* Back Panel, 13" or more */ ++			} else { ++				return 2; /* Front Panel */ ++			} ++ ++		} else if (plat_type == PCH_TYPE_MOBILE) { ++			if (location == USB_PORT_INTERNAL) { ++				return 2; /* Internal Topology */ ++			} else if (location == USB_PORT_DOCK) { ++				if (length < 0x50) ++					return 1; /* Dock, less than 5" */ ++				else ++					return 2; /* Dock, 5" or more */ ++			} else { ++				if (length < 0x100) ++					return 2; /* Back Panel, less than 10" */ ++				else ++					return 3; /* Back Panel, 10" or more */ ++			} ++		} ++	} else { ++		if (location == USB_PORT_BACK_PANEL || location == USB_PORT_MINI_PCIE) { ++			if (length < 0x100) ++				return 2; /* Back Panel, less than 10" */ ++			else ++				return 3; /* Back Panel, 10" or more */ ++		} else if (location == USB_PORT_DOCK) { ++			if (length < 0x50) ++				return 1; /* Dock, less than 5" */ ++			else ++				return 2; /* Dock, 5" or more */ ++		} else { ++			return 2; /* Internal Topology */ ++		} ++	} ++	printk(BIOS_ERR, "%s: Unhandled case\n", __func__); ++	return 0; ++} ++ ++static void program_ehci_port_length(void) ++{ ++	for (unsigned int port = 0; port < physical_port_count(); port++) { ++		if (!mainboard_usb2_ports[port].enable) ++			continue; ++		const uint32_t addr = 0xe5004000 + (port + 1) * 0x100; ++		const uint8_t param_1 = get_ehci_tune_param_1(&mainboard_usb2_ports[port]); ++		const uint8_t param_2 = get_ehci_tune_param_2(&mainboard_usb2_ports[port]); ++		pch_iobp_update(addr, ~0x7f00, param_2 << 11 | param_1 << 8); ++	} ++} ++ ++void early_usb_init(void) ++{ ++	/** TODO: Make this configurable? How do the modes affect usbdebug? **/ ++	const enum usb_port_route usb_route = ROUTE_TO_XHCI; ++	///(pd->boot_mode == 2 && pd->usb_xhci_on_resume) ? ROUTE_TO_XHCI : ROUTE_TO_EHCI; ++ ++	common_ehci_hcs_init(); ++	xhci_open_memory_space(); ++	common_xhci_hc_init(); ++	perform_xhci_ehci_switching_flow(usb_route); ++	usb_overcurrent_mapping(); ++	program_ehci_port_length(); ++	/** FIXME: USB per port control is missing, is it needed? **/ ++	xhci_close_memory_space(); ++	/** TODO: Close EHCI memory space? **/ ++} +diff --git a/src/southbridge/intel/lynxpoint/pch.h b/src/southbridge/intel/lynxpoint/pch.h +index b5e0c2a830..ad983d86cf 100644 +--- a/src/southbridge/intel/lynxpoint/pch.h ++++ b/src/southbridge/intel/lynxpoint/pch.h +@@ -115,6 +115,7 @@ enum pch_platform_type { +  + void pch_dmi_setup_physical_layer(void); + void pch_dmi_tc_vc_mapping(u32 vc0, u32 vc1, u32 vcp, u32 vcm); ++void early_usb_init(void); +  + void usb_ehci_sleep_prepare(pci_devfn_t dev, u8 slp_typ); + void usb_ehci_disable(pci_devfn_t dev); +@@ -202,6 +203,8 @@ void mainboard_config_rcba(void); + #define GEN_PMCON_1		0xa0 + #define  SMI_LOCK		(1 << 4) + #define GEN_PMCON_2		0xa2 ++#define  GEN_PMCON_2_DISB	(1 << 7) ++#define  GEN_PMCON_2_MEM_SR	(1 << 5) + #define  SYSTEM_RESET_STS	(1 << 4) + #define  THERMTRIP_STS		(1 << 3) + #define  SYSPWR_FLR		(1 << 1) +@@ -215,6 +218,7 @@ void mainboard_config_rcba(void); + #define PMIR			0xac + #define  PMIR_CF9LOCK		(1 << 31) + #define  PMIR_CF9GR		(1 << 20) ++#define  PMIR_XHCI_SMART_AUTO	(1 << 16) /* c.f. LPT BWG or WPT-LP BIOS spec */ +  + /* GEN_PMCON_3 bits */ + #define RTC_BATTERY_DEAD	(1 << 2) +@@ -282,6 +286,20 @@ void mainboard_config_rcba(void); + #define SATA_DTLE_DATA_SHIFT	24 + #define SATA_DTLE_EDGE_SHIFT	16 +  ++/* ++ * HCD_INDEX == 2 selects 0:1a.0 (PCH_EHCI2), any other index ++ * selects 0:1d.0 (PCH_EHCI1) for usbdebug use. ++ */ ++#if CONFIG_USBDEBUG_HCD_INDEX != 2 ++#define PCH_EHCI1_TEMP_BAR0 CONFIG_EHCI_BAR ++#define PCH_EHCI2_TEMP_BAR0 (PCH_EHCI1_TEMP_BAR0 + 0x400) ++#else ++#define PCH_EHCI2_TEMP_BAR0 CONFIG_EHCI_BAR ++#define PCH_EHCI1_TEMP_BAR0 (PCH_EHCI2_TEMP_BAR0 + 0x400) ++#endif ++ ++#define PCH_XHCI_TEMP_BAR0	0xe8100000 ++ + /* EHCI PCI Registers */ + #define EHCI_PWR_CTL_STS	0x54 + #define  PWR_CTL_SET_MASK	0x3 +@@ -289,10 +307,15 @@ void mainboard_config_rcba(void); + #define  PWR_CTL_SET_D3		0x3 + #define  PWR_CTL_ENABLE_PME	(1 << 8) + #define  PWR_CTL_STATUS_PME	(1 << 15) ++#define EHCI_OCMAP		0x74 ++#define EHCI_ACCESS_CNTL	0x80 ++#define  ACCESS_CNTL_ENABLE	(1 << 0) +  + /* EHCI Memory Registers */ ++#define EHCI_HCS_PARAMS		0x04 + #define EHCI_USB_CMD		0x20 + #define  EHCI_USB_CMD_RUN	(1 << 0) ++#define  EHCI_USB_CMD_HCRESET	(1 << 1) + #define  EHCI_USB_CMD_PSE	(1 << 4) + #define  EHCI_USB_CMD_ASE	(1 << 5) + #define EHCI_PORTSC(port)	(0x64 + (port) * 4) +@@ -301,6 +324,10 @@ void mainboard_config_rcba(void); +  + /* XHCI PCI Registers */ + #define XHCI_PWR_CTL_STS	0x74 ++#define XHCI_U2OCM1		0xc0 ++#define XHCI_U2OCM2		0xc4 ++#define XHCI_U3OCM1		0xc8 ++#define XHCI_U3OCM2		0xcc + #define XHCI_USB2PR		0xd0 + #define XHCI_USB2PRM		0xd4 + #define  XHCI_USB2PR_HCSEL	0x7fff +@@ -313,6 +340,27 @@ void mainboard_config_rcba(void); + #define XHCI_USB3PDO		0xe8 +  + /* XHCI Memory Registers */ ++#define XHCI_HCS_PARAMS_1	0x04 ++#define XHCI_HCS_PARAMS_2	0x08 ++#define XHCI_HCS_PARAMS_3	0x0c ++#define XHCI_HCC_PARAMS		0x10 ++#define XHCI_USBCMD		0x80 ++#define XHCI_USB2_PORTSC(port)	(0x480 + ((port) * 0x10)) ++#define  XHCI_USB2_PORTSC_WPR	(1 << 31)	/* Warm Port Reset */ ++#define  XHCI_USB2_PORTSC_CEC	(1 << 23)	/* Port Config Error Change */ ++#define  XHCI_USB2_PORTSC_PLC	(1 << 22)	/* Port Link State Change */ ++#define  XHCI_USB2_PORTSC_PRC	(1 << 21)	/* Port Reset Change */ ++#define  XHCI_USB2_PORTSC_OCC	(1 << 20)	/* Over-current Change */ ++#define  XHCI_USB2_PORTSC_WRC	(1 << 19)	/* Warm Port Reset Change */ ++#define  XHCI_USB2_PORTSC_PEC	(1 << 18)	/* Port Enabled Disabled Change */ ++#define  XHCI_USB2_PORTSC_CSC	(1 << 17)	/* Connect Status Change */ ++#define  XHCI_USB2_PORTSC_CHST	(0x7f << 17) ++#define  XHCI_USB2_PORTSC_LWS	(1 << 16)	/* Port Link State Write Strobe */ ++#define  XHCI_USB2_PORTSC_PP	(1 <<  9) ++#define  XHCI_USB2_PORTSC_PR	(1 <<  4)	/* Port Reset */ ++#define  XHCI_USB2_PORTSC_PED	(1 <<  1)	/* Port Enable/Disabled */ ++#define  XHCI_USB2_PORTSC_CCS	(1 <<  0)	/* Current Connect Status */ ++ + #define XHCI_USB3_PORTSC(port)	((pch_is_lp() ? 0x510 : 0x570) + ((port) * 0x10)) + #define  XHCI_USB3_PORTSC_CHST	(0x7f << 17) + #define  XHCI_USB3_PORTSC_WCE	(1 << 25)	/* Wake on Connect */ +@@ -320,6 +368,7 @@ void mainboard_config_rcba(void); + #define  XHCI_USB3_PORTSC_WOE	(1 << 27)	/* Wake on Overcurrent */ + #define  XHCI_USB3_PORTSC_WRC	(1 << 19)	/* Warm Reset Complete */ + #define  XHCI_USB3_PORTSC_LWS	(1 << 16)	/* Link Write Strobe */ ++#define  XHCI_USB3_PORTSC_PR	(1 << 4)	/* Port Reset */ + #define  XHCI_USB3_PORTSC_PED	(1 << 1)	/* Port Enabled/Disabled */ + #define  XHCI_USB3_PORTSC_WPR	(1 << 31)	/* Warm Port Reset */ + #define  XHCI_USB3_PORTSC_PLS	(0xf << 5)	/* Port Link State */ +--  +2.39.2 + diff --git a/config/coreboot/haswell/patches/0006-sb-intel-lynxpoint-Add-native-thermal-init.patch b/config/coreboot/haswell/patches/0006-sb-intel-lynxpoint-Add-native-thermal-init.patch new file mode 100644 index 00000000..157d2999 --- /dev/null +++ b/config/coreboot/haswell/patches/0006-sb-intel-lynxpoint-Add-native-thermal-init.patch @@ -0,0 +1,128 @@ +From 92be49d8422b4bc1c89bb49535f4dc6a01d47295 Mon Sep 17 00:00:00 2001 +From: Angel Pons <th3fanbus@gmail.com> +Date: Fri, 6 May 2022 23:22:11 +0200 +Subject: [PATCH 06/26] sb/intel/lynxpoint: Add native thermal init + +Implement native thermal initialisation for Lynx Point. This is only +needed when MRC.bin is not used. + +Change-Id: I4a67a3092d0c2e56bfdacb513a899ef838193cbd +Signed-off-by: Angel Pons <th3fanbus@gmail.com> +--- + .../haswell/native_raminit/raminit_native.c   |  1 + + src/southbridge/intel/lynxpoint/Makefile.inc  |  2 +- + src/southbridge/intel/lynxpoint/pch.h         |  1 + + src/southbridge/intel/lynxpoint/thermal.c     | 64 +++++++++++++++++++ + 4 files changed, 67 insertions(+), 1 deletion(-) + create mode 100644 src/southbridge/intel/lynxpoint/thermal.c + +diff --git a/src/northbridge/intel/haswell/native_raminit/raminit_native.c b/src/northbridge/intel/haswell/native_raminit/raminit_native.c +index ef61d4ee09..dd1f1ec14e 100644 +--- a/src/northbridge/intel/haswell/native_raminit/raminit_native.c ++++ b/src/northbridge/intel/haswell/native_raminit/raminit_native.c +@@ -16,6 +16,7 @@ static bool early_init_native(int s3resume) + 	/** TODO: CPU replacement check must be skipped in warm boots and S3 resumes **/ + 	const bool cpu_replaced = !s3resume && intel_early_me_cpu_replacement_check(); +  ++	early_thermal_init(); + 	early_usb_init(); +  + 	if (!CONFIG(INTEL_LYNXPOINT_LP)) +diff --git a/src/southbridge/intel/lynxpoint/Makefile.inc b/src/southbridge/intel/lynxpoint/Makefile.inc +index 0e1f2fe4eb..a9a9b153d6 100644 +--- a/src/southbridge/intel/lynxpoint/Makefile.inc ++++ b/src/southbridge/intel/lynxpoint/Makefile.inc +@@ -37,7 +37,7 @@ bootblock-y += early_pch.c + romstage-y += early_usb.c early_me.c me_status.c early_pch.c + romstage-y += pmutil.c +  +-romstage-$(CONFIG_USE_NATIVE_RAMINIT) += early_pch_native.c early_usb_native.c iobp.c ++romstage-$(CONFIG_USE_NATIVE_RAMINIT) += early_pch_native.c early_usb_native.c iobp.c thermal.c +  + ifeq ($(CONFIG_INTEL_LYNXPOINT_LP),y) + romstage-y += lp_gpio.c +diff --git a/src/southbridge/intel/lynxpoint/pch.h b/src/southbridge/intel/lynxpoint/pch.h +index ad983d86cf..38a9349220 100644 +--- a/src/southbridge/intel/lynxpoint/pch.h ++++ b/src/southbridge/intel/lynxpoint/pch.h +@@ -116,6 +116,7 @@ enum pch_platform_type { + void pch_dmi_setup_physical_layer(void); + void pch_dmi_tc_vc_mapping(u32 vc0, u32 vc1, u32 vcp, u32 vcm); + void early_usb_init(void); ++void early_thermal_init(void); +  + void usb_ehci_sleep_prepare(pci_devfn_t dev, u8 slp_typ); + void usb_ehci_disable(pci_devfn_t dev); +diff --git a/src/southbridge/intel/lynxpoint/thermal.c b/src/southbridge/intel/lynxpoint/thermal.c +new file mode 100644 +index 0000000000..e71969ea0c +--- /dev/null ++++ b/src/southbridge/intel/lynxpoint/thermal.c +@@ -0,0 +1,64 @@ ++/* SPDX-License-Identifier: GPL-2.0-or-later */ ++ ++#include <device/mmio.h> ++#include <device/pci_ops.h> ++#include <southbridge/intel/lynxpoint/pch.h> ++#include <types.h> ++ ++#define TBARB_TEMP 0x40000000 ++ ++#define THERMAL_DEV PCI_DEV(0, 0x1f, 6) ++ ++/* Early thermal init, it may need to be done prior to giving ME its memory */ ++void early_thermal_init(void) ++{ ++	/* Program address for temporary BAR */ ++	pci_write_config32(THERMAL_DEV, 0x40, TBARB_TEMP); ++	pci_write_config32(THERMAL_DEV, 0x44, 0); ++ ++	/* Activate temporary BAR */ ++	pci_or_config32(THERMAL_DEV, 0x40, 1); ++ ++	/* ++	 * BWG section 17.3.1 says: ++	 * ++	 * ### Initializing Lynx Point Thermal Sensors ### ++	 * ++	 * The System BIOS must perform the following steps to initialize the Lynx ++	 * Point thermal subsystem device, D31:F6. The System BIOS is required to ++	 * repeat this process on a resume from Sx. BIOS may enable any or all of ++	 * the registers below based on OEM's platform configuration. Intel does ++	 * not recommend a value on some of the registers, since each platform has ++	 * different temperature trip points and one may enable a trip to cause an ++	 * SMI while another platform would cause an interrupt instead. ++	 * ++	 * The recommended flow for enabling thermal sensor is by setting up various ++	 * temperature trip points first, followed by enabling the desired trip ++	 * alert method and then enable the actual sensors from TSEL registers. ++	 * If this flow is not followed, software will need to take special care ++	 * to handle false events during setting up those registers. ++	 */ ++ ++	/* Step 1: Program CTT */ ++	write16p(TBARB_TEMP + 0x10, 0x0154); ++ ++	/* Step 2: Clear trip status from TSS and TAS */ ++	write8p(TBARB_TEMP + 0x06, 0xff); ++	write8p(TBARB_TEMP + 0x80, 0xff); ++ ++	/* Step 3: Program TSGPEN and TSPIEN to zero */ ++	write8p(TBARB_TEMP + 0x84, 0x00); ++	write8p(TBARB_TEMP + 0x82, 0x00); ++ ++	/* ++	 * Step 4: If thermal reporting to an EC over SMBus is supported, ++	 *         then write 0x01 to TSREL, else leave at default. ++	 */ ++	write8p(TBARB_TEMP + 0x0a, 0x01); ++ ++	/* Disable temporary BAR */ ++	pci_and_config32(THERMAL_DEV, 0x40, ~1); ++ ++	/* Clear temporary BAR address */ ++	pci_write_config32(THERMAL_DEV, 0x40, 0); ++} +--  +2.39.2 + diff --git a/config/coreboot/haswell/patches/0007-sb-intel-lynxpoint-Add-native-PCH-init.patch b/config/coreboot/haswell/patches/0007-sb-intel-lynxpoint-Add-native-PCH-init.patch new file mode 100644 index 00000000..74427f5d --- /dev/null +++ b/config/coreboot/haswell/patches/0007-sb-intel-lynxpoint-Add-native-PCH-init.patch @@ -0,0 +1,785 @@ +From 7378cb4fefc87b9a096bb14820a44f26f3a628f5 Mon Sep 17 00:00:00 2001 +From: Angel Pons <th3fanbus@gmail.com> +Date: Fri, 6 May 2022 23:43:46 +0200 +Subject: [PATCH 07/26] sb/intel/lynxpoint: Add native PCH init + +Implement native PCH initialisation for Lynx Point. This is only needed +when MRC.bin is not used. + +Change-Id: I36867bdc8b20000e44ff9d0d7b2c0d63952bd561 +Signed-off-by: Angel Pons <th3fanbus@gmail.com> +--- + .../haswell/native_raminit/raminit_native.c   |   3 +- + src/southbridge/intel/lynxpoint/Makefile.inc  |   1 + + .../intel/lynxpoint/early_pch_native.c        | 123 +++++++++ + .../intel/lynxpoint/hsio/Makefile.inc         |   8 + + src/southbridge/intel/lynxpoint/hsio/common.c |  52 ++++ + src/southbridge/intel/lynxpoint/hsio/hsio.h   |  46 ++++ + .../intel/lynxpoint/hsio/lpt_h_cx.c           | 244 ++++++++++++++++++ + .../intel/lynxpoint/hsio/lpt_lp_bx.c          | 180 +++++++++++++ + src/southbridge/intel/lynxpoint/pch.h         |   6 + + 9 files changed, 661 insertions(+), 2 deletions(-) + create mode 100644 src/southbridge/intel/lynxpoint/hsio/Makefile.inc + create mode 100644 src/southbridge/intel/lynxpoint/hsio/common.c + create mode 100644 src/southbridge/intel/lynxpoint/hsio/hsio.h + create mode 100644 src/southbridge/intel/lynxpoint/hsio/lpt_h_cx.c + create mode 100644 src/southbridge/intel/lynxpoint/hsio/lpt_lp_bx.c + +diff --git a/src/northbridge/intel/haswell/native_raminit/raminit_native.c b/src/northbridge/intel/haswell/native_raminit/raminit_native.c +index dd1f1ec14e..b6efb6b40d 100644 +--- a/src/northbridge/intel/haswell/native_raminit/raminit_native.c ++++ b/src/northbridge/intel/haswell/native_raminit/raminit_native.c +@@ -16,8 +16,7 @@ static bool early_init_native(int s3resume) + 	/** TODO: CPU replacement check must be skipped in warm boots and S3 resumes **/ + 	const bool cpu_replaced = !s3resume && intel_early_me_cpu_replacement_check(); +  +-	early_thermal_init(); +-	early_usb_init(); ++	early_pch_init_native(s3resume); +  + 	if (!CONFIG(INTEL_LYNXPOINT_LP)) + 		dmi_early_init(); +diff --git a/src/southbridge/intel/lynxpoint/Makefile.inc b/src/southbridge/intel/lynxpoint/Makefile.inc +index a9a9b153d6..63243ecc86 100644 +--- a/src/southbridge/intel/lynxpoint/Makefile.inc ++++ b/src/southbridge/intel/lynxpoint/Makefile.inc +@@ -38,6 +38,7 @@ romstage-y += early_usb.c early_me.c me_status.c early_pch.c + romstage-y += pmutil.c +  + romstage-$(CONFIG_USE_NATIVE_RAMINIT) += early_pch_native.c early_usb_native.c iobp.c thermal.c ++subdirs-$(CONFIG_USE_NATIVE_RAMINIT) += hsio +  + ifeq ($(CONFIG_INTEL_LYNXPOINT_LP),y) + romstage-y += lp_gpio.c +diff --git a/src/southbridge/intel/lynxpoint/early_pch_native.c b/src/southbridge/intel/lynxpoint/early_pch_native.c +index c28ddfcf5d..421821fa5d 100644 +--- a/src/southbridge/intel/lynxpoint/early_pch_native.c ++++ b/src/southbridge/intel/lynxpoint/early_pch_native.c +@@ -1,10 +1,133 @@ + /* SPDX-License-Identifier: GPL-2.0-or-later */ +  + #include <console/console.h> ++#include <device/pci_def.h> + #include <device/pci_ops.h> ++#include <southbridge/intel/lynxpoint/hsio/hsio.h> + #include <southbridge/intel/lynxpoint/pch.h> + #include <types.h> +  ++static void early_sata_init(const uint8_t pch_revision) ++{ ++	const bool is_mobile = get_pch_platform_type() != PCH_TYPE_DESKTOP; ++ ++	const uint8_t lane_owner = pci_read_config8(PCI_DEV(0, 0x1c, 0), 0x410); ++	printk(BIOS_DEBUG, "HSIO lane owner: 0x%02x\n", lane_owner); ++ ++	/* BWG Step 2 */ ++	pci_update_config32(PCH_SATA_DEV, SATA_SCLKG, ~0x1ff, 0x183); ++ ++	/* BWG Step 3: Set OOB Retry Mode */ ++	pci_or_config16(PCH_SATA_DEV, SATA_PCS, 1 << 15); ++ ++	/* BWG Step 4: Program the SATA mPHY tables */ ++	if (pch_is_lp()) { ++		if (pch_revision >= LPT_LP_STEP_B0 && pch_revision <= LPT_LP_STEP_B2) { ++			program_hsio_sata_lpt_lp_bx(is_mobile); ++		} else { ++			printk(BIOS_ERR, "Unsupported PCH-LP stepping 0x%02x\n", pch_revision); ++		} ++	} else { ++		if (pch_revision >= LPT_H_STEP_C0) { ++			program_hsio_sata_lpt_h_cx(is_mobile); ++		} else { ++			printk(BIOS_ERR, "Unsupported PCH-H stepping 0x%02x\n", pch_revision); ++		} ++	} ++ ++	/** FIXME: Program SATA RxEq tables **/ ++ ++	/* BWG Step 5 */ ++	/** FIXME: Only for desktop and mobile (skip this on workstation and server) **/ ++	pci_or_config32(PCH_SATA_DEV, 0x98, BIT(22)); ++ ++	/* BWG Step 6 */ ++	pci_or_config32(PCH_SATA_DEV, 0x98, BIT(19)); ++ ++	/* BWG Step 7 */ ++	pci_update_config32(PCH_SATA_DEV, 0x98, ~(0x3f << 7), 0x04 << 7); ++ ++	/* BWG Step 8 */ ++	pci_or_config32(PCH_SATA_DEV, 0x98, BIT(20)); ++ ++	/* BWG Step 9 */ ++	pci_update_config32(PCH_SATA_DEV, 0x98, ~(3 << 5), 1 << 5); ++ ++	/* BWG Step 10 */ ++	pci_or_config32(PCH_SATA_DEV, 0x98, BIT(18)); ++ ++	/* Enable SATA ports */ ++	uint8_t sata_pcs = 0; ++	if (CONFIG(INTEL_LYNXPOINT_LP)) { ++		for (uint8_t i = 0; i < 4; i++) { ++			if ((lane_owner & BIT(7 - i)) == 0) { ++				sata_pcs |= BIT(i); ++			} ++		} ++	} else { ++		sata_pcs |= 0x0f; ++		for (uint8_t i = 4; i < 6; i++) { ++			if ((lane_owner & BIT(i)) == 0) { ++				sata_pcs |= BIT(i); ++			} ++		} ++	} ++	printk(BIOS_DEBUG, "SATA port enables: 0x%02x\n", sata_pcs); ++	pci_or_config8(PCH_SATA_DEV, SATA_PCS, sata_pcs); ++} ++ ++void early_pch_init_native(int s3resume) ++{ ++	const uint8_t pch_revision = pci_read_config8(PCH_LPC_DEV, PCI_REVISION_ID); ++ ++	RCBA16(DISPBDF) = 0x0010; ++	RCBA32_OR(FD2, PCH_ENABLE_DBDF); ++ ++	/** FIXME: Check GEN_PMCON_3 and handle RTC failure? **/ ++ ++	RCBA32(PRSTS) = BIT(4); ++ ++	early_sata_init(pch_revision); ++ ++	pci_or_config8(PCH_LPC_DEV, 0xa6, 1 << 1); ++	pci_and_config8(PCH_LPC_DEV, 0xdc, ~(1 << 5 | 1 << 1)); ++ ++	/** TODO: Send GET HSIO VER and update ChipsetInit table? Is it needed? **/ ++ ++	/** FIXME: GbE handling? **/ ++ ++	pci_update_config32(PCH_LPC_DEV, 0xac, ~(1 << 20), 0); ++ ++	for (uint8_t i = 0; i < 8; i++) ++		pci_update_config32(PCI_DEV(0, 0x1c, i), 0x338, ~(1 << 26), 0); ++ ++	pci_update_config8(PCI_DEV(0, 0x1c, 0), 0xf4, ~(3 << 5), 1 << 7); ++ ++	pci_update_config8(PCI_DEV(0, 26, 0), 0x88, ~(1 << 2), 0); ++	pci_update_config8(PCI_DEV(0, 29, 0), 0x88, ~(1 << 2), 0); ++ ++	/** FIXME: Disable SATA2 device? **/ ++ ++	if (pch_is_lp()) { ++		if (pch_revision >= LPT_LP_STEP_B0 && pch_revision <= LPT_LP_STEP_B2) { ++			program_hsio_xhci_lpt_lp_bx(); ++			program_hsio_igbe_lpt_lp_bx(); ++		} else { ++			printk(BIOS_ERR, "Unsupported PCH-LP stepping 0x%02x\n", pch_revision); ++		} ++	} else { ++		if (pch_revision >= LPT_H_STEP_C0) { ++			program_hsio_xhci_lpt_h_cx(); ++			program_hsio_igbe_lpt_h_cx(); ++		} else { ++			printk(BIOS_ERR, "Unsupported PCH-H stepping 0x%02x\n", pch_revision); ++		} ++	} ++ ++	early_thermal_init(); ++	early_usb_init(); ++} ++ + void pch_dmi_setup_physical_layer(void) + { + 	/* FIXME: We need to make sure the SA supports Gen2 as well */ +diff --git a/src/southbridge/intel/lynxpoint/hsio/Makefile.inc b/src/southbridge/intel/lynxpoint/hsio/Makefile.inc +new file mode 100644 +index 0000000000..6b74997511 +--- /dev/null ++++ b/src/southbridge/intel/lynxpoint/hsio/Makefile.inc +@@ -0,0 +1,8 @@ ++## SPDX-License-Identifier: GPL-2.0-or-later ++ ++romstage-y += common.c ++ifeq ($(CONFIG_INTEL_LYNXPOINT_LP),y) ++romstage-y += lpt_lp_bx.c ++else ++romstage-y += lpt_h_cx.c ++endif +diff --git a/src/southbridge/intel/lynxpoint/hsio/common.c b/src/southbridge/intel/lynxpoint/hsio/common.c +new file mode 100644 +index 0000000000..9935ca347a +--- /dev/null ++++ b/src/southbridge/intel/lynxpoint/hsio/common.c +@@ -0,0 +1,52 @@ ++/* SPDX-License-Identifier: GPL-2.0-or-later */ ++ ++#include <device/pci_ops.h> ++#include <southbridge/intel/lynxpoint/hsio/hsio.h> ++#include <types.h> ++ ++/* ++ * FIXME: Ask Intel whether all lanes need to be programmed as specified ++ * in the PCH BWG. If not, make separate tables and only check this once. ++ */ ++void hsio_sata_shared_update(const uint32_t addr, const uint32_t and, const uint32_t or) ++{ ++	const uint8_t lane_owner = pci_read_config8(PCI_DEV(0, 0x1c, 0), 0x410); ++ ++	if ((addr & 0xfe00) == 0x2000 && (lane_owner & (1 << 4))) ++		return; ++ ++	if ((addr & 0xfe00) == 0x2200 && (lane_owner & (1 << 5))) ++		return; ++ ++	if (CONFIG(INTEL_LYNXPOINT_LP)) { ++		if ((addr & 0xfe00) == 0x2400 && (lane_owner & (1 << 6))) ++			return; ++ ++		if ((addr & 0xfe00) == 0x2600 && (lane_owner & (1 << 7))) ++			return; ++	} ++	hsio_update(addr, and, or); ++} ++ ++/* ++ * FIXME: Ask Intel whether all lanes need to be programmed as specified ++ * in the PCH BWG. If not, make separate tables and only check this once. ++ */ ++void hsio_xhci_shared_update(const uint32_t addr, const uint32_t and, const uint32_t or) ++{ ++	const uint8_t lane_owner = pci_read_config8(PCI_DEV(0, 0x1c, 0), 0x410); ++	if (CONFIG(INTEL_LYNXPOINT_LP)) { ++		if ((addr & 0xfe00) == 0x2400 && ((lane_owner >> 0) & 3) != 2) ++			return; ++ ++		if ((addr & 0xfe00) == 0x2600 && ((lane_owner >> 2) & 3) != 2) ++			return; ++	} else { ++		if ((addr & 0xfe00) == 0x2c00 && ((lane_owner >> 2) & 3) != 2) ++			return; ++ ++		if ((addr & 0xfe00) == 0x2e00 && ((lane_owner >> 0) & 3) != 2) ++			return; ++	} ++	hsio_update(addr, and, or); ++} +diff --git a/src/southbridge/intel/lynxpoint/hsio/hsio.h b/src/southbridge/intel/lynxpoint/hsio/hsio.h +new file mode 100644 +index 0000000000..689ef4a05b +--- /dev/null ++++ b/src/southbridge/intel/lynxpoint/hsio/hsio.h +@@ -0,0 +1,46 @@ ++/* SPDX-License-Identifier: GPL-2.0-or-later */ ++ ++#ifndef SOUTHBRIDGE_INTEL_LYNXPOINT_HSIO_H ++#define SOUTHBRIDGE_INTEL_LYNXPOINT_HSIO_H ++ ++#include <southbridge/intel/lynxpoint/iobp.h> ++#include <types.h> ++ ++struct hsio_table_row { ++	uint32_t addr; ++	uint32_t and; ++	uint32_t or; ++}; ++ ++static inline void hsio_update(const uint32_t addr, const uint32_t and, const uint32_t or) ++{ ++	pch_iobp_update(addr, and, or); ++} ++ ++static inline void hsio_update_row(const struct hsio_table_row row) ++{ ++	hsio_update(row.addr, row.and, row.or); ++} ++ ++void hsio_xhci_shared_update(const uint32_t addr, const uint32_t and, const uint32_t or); ++void hsio_sata_shared_update(const uint32_t addr, const uint32_t and, const uint32_t or); ++ ++static inline void hsio_sata_shared_update_row(const struct hsio_table_row row) ++{ ++	hsio_sata_shared_update(row.addr, row.and, row.or); ++} ++ ++static inline void hsio_xhci_shared_update_row(const struct hsio_table_row row) ++{ ++	hsio_xhci_shared_update(row.addr, row.and, row.or); ++} ++ ++void program_hsio_sata_lpt_h_cx(const bool is_mobile); ++void program_hsio_xhci_lpt_h_cx(void); ++void program_hsio_igbe_lpt_h_cx(void); ++ ++void program_hsio_sata_lpt_lp_bx(const bool is_mobile); ++void program_hsio_xhci_lpt_lp_bx(void); ++void program_hsio_igbe_lpt_lp_bx(void); ++ ++#endif +diff --git a/src/southbridge/intel/lynxpoint/hsio/lpt_h_cx.c b/src/southbridge/intel/lynxpoint/hsio/lpt_h_cx.c +new file mode 100644 +index 0000000000..b5dd402742 +--- /dev/null ++++ b/src/southbridge/intel/lynxpoint/hsio/lpt_h_cx.c +@@ -0,0 +1,244 @@ ++/* SPDX-License-Identifier: GPL-2.0-or-later */ ++ ++#include <device/pci_ops.h> ++#include <southbridge/intel/lynxpoint/hsio/hsio.h> ++#include <types.h> ++ ++const struct hsio_table_row hsio_sata_shared_lpt_h_cx[] = { ++	{ 0xea002008, ~0xfffc6108, 0xea6c6108 }, ++	{ 0xea002208, ~0xfffc6108, 0xea6c6108 }, ++	{ 0xea002038, ~0x3f00000f, 0x0700000d }, ++	{ 0xea002238, ~0x3f00000f, 0x0700000d }, ++	{ 0xea00202c, ~0x00020f00, 0x00020100 }, ++	{ 0xea00222c, ~0x00020f00, 0x00020100 }, ++	{ 0xea002040, ~0x1f000000, 0x01000000 }, ++	{ 0xea002240, ~0x1f000000, 0x01000000 }, ++	{ 0xea002010, ~0xffff0000, 0x0d510000 }, ++	{ 0xea002210, ~0xffff0000, 0x0d510000 }, ++	{ 0xea002018, ~0xffff0300, 0x38250100 }, ++	{ 0xea002218, ~0xffff0300, 0x38250100 }, ++	{ 0xea002000, ~0xcf030000, 0xcf030000 }, ++	{ 0xea002200, ~0xcf030000, 0xcf030000 }, ++	{ 0xea002028, ~0xff1f0000, 0x580e0000 }, ++	{ 0xea002228, ~0xff1f0000, 0x580e0000 }, ++	{ 0xea00201c, ~0x00007c00, 0x00002400 }, ++	{ 0xea00221c, ~0x00007c00, 0x00002400 }, ++	{ 0xea00208c, ~0x00ff0000, 0x00800000 }, ++	{ 0xea00228c, ~0x00ff0000, 0x00800000 }, ++	{ 0xea0020a4, ~0x0030ff00, 0x00308300 }, ++	{ 0xea0022a4, ~0x0030ff00, 0x00308300 }, ++	{ 0xea0020ac, ~0x00000030, 0x00000020 }, ++	{ 0xea0022ac, ~0x00000030, 0x00000020 }, ++	{ 0xea002140, ~0x00ffffff, 0x00140718 }, ++	{ 0xea002340, ~0x00ffffff, 0x00140718 }, ++	{ 0xea002144, ~0x00ffffff, 0x00140998 }, ++	{ 0xea002344, ~0x00ffffff, 0x00140998 }, ++	{ 0xea002148, ~0x00ffffff, 0x00140998 }, ++	{ 0xea002348, ~0x00ffffff, 0x00140998 }, ++	{ 0xea00217c, ~0x03000000, 0x03000000 }, ++	{ 0xea00237c, ~0x03000000, 0x03000000 }, ++	{ 0xea002178, ~0x00001f00, 0x00001800 }, ++	{ 0xea002378, ~0x00001f00, 0x00001800 }, ++	{ 0xea00210c, ~0x0038000f, 0x00000005 }, ++	{ 0xea00230c, ~0x0038000f, 0x00000005 }, ++}; ++ ++const struct hsio_table_row hsio_sata_lpt_h_cx[] = { ++	{ 0xea008008, ~0xff000000, 0x1c000000 }, ++	{ 0xea002408, ~0xfffc6108, 0xea6c6108 }, ++	{ 0xea002608, ~0xfffc6108, 0xea6c6108 }, ++	{ 0xea000808, ~0xfffc6108, 0xea6c6108 }, ++	{ 0xea000a08, ~0xfffc6108, 0xea6c6108 }, ++	{ 0xea002438, ~0x3f00000f, 0x0700000d }, ++	{ 0xea002638, ~0x3f00000f, 0x0700000d }, ++	{ 0xea000838, ~0x3f00000f, 0x0700000d }, ++	{ 0xea000a38, ~0x3f00000f, 0x0700000d }, ++	{ 0xea002440, ~0x1f000000, 0x01000000 }, ++	{ 0xea002640, ~0x1f000000, 0x01000000 }, ++	{ 0xea000840, ~0x1f000000, 0x01000000 }, ++	{ 0xea000a40, ~0x1f000000, 0x01000000 }, ++	{ 0xea002410, ~0xffff0000, 0x0d510000 }, ++	{ 0xea002610, ~0xffff0000, 0x0d510000 }, ++	{ 0xea000810, ~0xffff0000, 0x0d510000 }, ++	{ 0xea000a10, ~0xffff0000, 0x0d510000 }, ++	{ 0xea00242c, ~0x00020800, 0x00020000 }, ++	{ 0xea00262c, ~0x00020800, 0x00020000 }, ++	{ 0xea00082c, ~0x00020800, 0x00020000 }, ++	{ 0xea000a2c, ~0x00020800, 0x00020000 }, ++	{ 0xea002418, ~0xffff0300, 0x38250100 }, ++	{ 0xea002618, ~0xffff0300, 0x38250100 }, ++	{ 0xea000818, ~0xffff0300, 0x38250100 }, ++	{ 0xea000a18, ~0xffff0300, 0x38250100 }, ++	{ 0xea002400, ~0xcf030000, 0xcf030000 }, ++	{ 0xea002600, ~0xcf030000, 0xcf030000 }, ++	{ 0xea000800, ~0xcf030000, 0xcf030000 }, ++	{ 0xea000a00, ~0xcf030000, 0xcf030000 }, ++	{ 0xea002428, ~0xff1f0000, 0x580e0000 }, ++	{ 0xea002628, ~0xff1f0000, 0x580e0000 }, ++	{ 0xea000828, ~0xff1f0000, 0x580e0000 }, ++	{ 0xea000a28, ~0xff1f0000, 0x580e0000 }, ++	{ 0xea00241c, ~0x00007c00, 0x00002400 }, ++	{ 0xea00261c, ~0x00007c00, 0x00002400 }, ++	{ 0xea00081c, ~0x00007c00, 0x00002400 }, ++	{ 0xea000a1c, ~0x00007c00, 0x00002400 }, ++	{ 0xea00248c, ~0x00ff0000, 0x00800000 }, ++	{ 0xea00268c, ~0x00ff0000, 0x00800000 }, ++	{ 0xea00088c, ~0x00ff0000, 0x00800000 }, ++	{ 0xea000a8c, ~0x00ff0000, 0x00800000 }, ++	{ 0xea0024a4, ~0x0030ff00, 0x00308300 }, ++	{ 0xea0026a4, ~0x0030ff00, 0x00308300 }, ++	{ 0xea0008a4, ~0x0030ff00, 0x00308300 }, ++	{ 0xea000aa4, ~0x0030ff00, 0x00308300 }, ++	{ 0xea0024ac, ~0x00000030, 0x00000020 }, ++	{ 0xea0026ac, ~0x00000030, 0x00000020 }, ++	{ 0xea0008ac, ~0x00000030, 0x00000020 }, ++	{ 0xea000aac, ~0x00000030, 0x00000020 }, ++	{ 0xea002540, ~0x00ffffff, 0x00140718 }, ++	{ 0xea002740, ~0x00ffffff, 0x00140718 }, ++	{ 0xea000940, ~0x00ffffff, 0x00140718 }, ++	{ 0xea000b40, ~0x00ffffff, 0x00140718 }, ++	{ 0xea002544, ~0x00ffffff, 0x00140998 }, ++	{ 0xea002744, ~0x00ffffff, 0x00140998 }, ++	{ 0xea000944, ~0x00ffffff, 0x00140998 }, ++	{ 0xea000b44, ~0x00ffffff, 0x00140998 }, ++	{ 0xea002548, ~0x00ffffff, 0x00140998 }, ++	{ 0xea002748, ~0x00ffffff, 0x00140998 }, ++	{ 0xea000948, ~0x00ffffff, 0x00140998 }, ++	{ 0xea000b48, ~0x00ffffff, 0x00140998 }, ++	{ 0xea00257c, ~0x03000000, 0x03000000 }, ++	{ 0xea00277c, ~0x03000000, 0x03000000 }, ++	{ 0xea00097c, ~0x03000000, 0x03000000 }, ++	{ 0xea000b7c, ~0x03000000, 0x03000000 }, ++	{ 0xea002578, ~0x00001f00, 0x00001800 }, ++	{ 0xea002778, ~0x00001f00, 0x00001800 }, ++	{ 0xea000978, ~0x00001f00, 0x00001800 }, ++	{ 0xea000b78, ~0x00001f00, 0x00001800 }, ++	{ 0xea00250c, ~0x0038000f, 0x00000005 }, ++	{ 0xea00270c, ~0x0038000f, 0x00000005 }, ++	{ 0xea00090c, ~0x0038000f, 0x00000005 }, ++	{ 0xea000b0c, ~0x0038000f, 0x00000005 }, ++}; ++ ++const struct hsio_table_row hsio_xhci_shared_lpt_h_cx[] = { ++	{ 0xe9002c2c, ~0x00000700, 0x00000100 }, ++	{ 0xe9002e2c, ~0x00000700, 0x00000100 }, ++	{ 0xe9002dcc, ~0x00001407, 0x00001407 }, ++	{ 0xe9002fcc, ~0x00001407, 0x00001407 }, ++	{ 0xe9002d68, ~0x01000f3c, 0x00000a28 }, ++	{ 0xe9002f68, ~0x01000f3c, 0x00000a28 }, ++	{ 0xe9002d6c, ~0x000000ff, 0x0000003f }, ++	{ 0xe9002f6c, ~0x000000ff, 0x0000003f }, ++	{ 0xe9002d4c, ~0x00ffff00, 0x00120500 }, ++	{ 0xe9002f4c, ~0x00ffff00, 0x00120500 }, ++	{ 0xe9002d14, ~0x38000700, 0x00000100 }, ++	{ 0xe9002f14, ~0x38000700, 0x00000100 }, ++	{ 0xe9002d64, ~0x0000f000, 0x00005000 }, ++	{ 0xe9002f64, ~0x0000f000, 0x00005000 }, ++	{ 0xe9002d70, ~0x00000018, 0x00000000 }, ++	{ 0xe9002f70, ~0x00000018, 0x00000000 }, ++	{ 0xe9002c38, ~0x3f00000f, 0x0700000b }, ++	{ 0xe9002e38, ~0x3f00000f, 0x0700000b }, ++	{ 0xe9002d40, ~0x00800000, 0x00000000 }, ++	{ 0xe9002f40, ~0x00800000, 0x00000000 }, ++}; ++ ++const struct hsio_table_row hsio_xhci_lpt_h_cx[] = { ++	{ 0xe90031cc, ~0x00001407, 0x00001407 }, ++	{ 0xe90033cc, ~0x00001407, 0x00001407 }, ++	{ 0xe90015cc, ~0x00001407, 0x00001407 }, ++	{ 0xe90017cc, ~0x00001407, 0x00001407 }, ++	{ 0xe9003168, ~0x01000f3c, 0x00000a28 }, ++	{ 0xe9003368, ~0x01000f3c, 0x00000a28 }, ++	{ 0xe9001568, ~0x01000f3c, 0x00000a28 }, ++	{ 0xe9001768, ~0x01000f3c, 0x00000a28 }, ++	{ 0xe900316c, ~0x000000ff, 0x0000003f }, ++	{ 0xe900336c, ~0x000000ff, 0x0000003f }, ++	{ 0xe900156c, ~0x000000ff, 0x0000003f }, ++	{ 0xe900176c, ~0x000000ff, 0x0000003f }, ++	{ 0xe900314c, ~0x00ffff00, 0x00120500 }, ++	{ 0xe900334c, ~0x00ffff00, 0x00120500 }, ++	{ 0xe900154c, ~0x00ffff00, 0x00120500 }, ++	{ 0xe900174c, ~0x00ffff00, 0x00120500 }, ++	{ 0xe9003114, ~0x38000700, 0x00000100 }, ++	{ 0xe9003314, ~0x38000700, 0x00000100 }, ++	{ 0xe9001514, ~0x38000700, 0x00000100 }, ++	{ 0xe9001714, ~0x38000700, 0x00000100 }, ++	{ 0xe9003164, ~0x0000f000, 0x00005000 }, ++	{ 0xe9003364, ~0x0000f000, 0x00005000 }, ++	{ 0xe9001564, ~0x0000f000, 0x00005000 }, ++	{ 0xe9001764, ~0x0000f000, 0x00005000 }, ++	{ 0xe9003170, ~0x00000018, 0x00000000 }, ++	{ 0xe9003370, ~0x00000018, 0x00000000 }, ++	{ 0xe9001570, ~0x00000018, 0x00000000 }, ++	{ 0xe9001770, ~0x00000018, 0x00000000 }, ++	{ 0xe9003038, ~0x3f00000f, 0x0700000b }, ++	{ 0xe9003238, ~0x3f00000f, 0x0700000b }, ++	{ 0xe9001438, ~0x3f00000f, 0x0700000b }, ++	{ 0xe9001638, ~0x3f00000f, 0x0700000b }, ++	{ 0xe9003140, ~0x00800000, 0x00000000 }, ++	{ 0xe9003340, ~0x00800000, 0x00000000 }, ++	{ 0xe9001540, ~0x00800000, 0x00000000 }, ++	{ 0xe9001740, ~0x00800000, 0x00000000 }, ++}; ++ ++void program_hsio_sata_lpt_h_cx(const bool is_mobile) ++{ ++	const struct hsio_table_row *pch_hsio_table; ++	size_t len; ++ ++	pch_hsio_table = hsio_sata_lpt_h_cx; ++	len = ARRAY_SIZE(hsio_sata_lpt_h_cx); ++	for (size_t i = 0; i < len; i++) ++		hsio_update_row(pch_hsio_table[i]); ++ ++	pch_hsio_table = hsio_sata_shared_lpt_h_cx; ++	len = ARRAY_SIZE(hsio_sata_shared_lpt_h_cx); ++	for (size_t i = 0; i < len; i++) ++		hsio_sata_shared_update_row(pch_hsio_table[i]); ++ ++	const uint32_t hsio_sata_value = is_mobile ? 0x00004c5a : 0x00003e67; ++ ++	hsio_update(0xea002490, ~0x0000ffff, hsio_sata_value); ++	hsio_update(0xea002690, ~0x0000ffff, hsio_sata_value); ++	hsio_update(0xea000890, ~0x0000ffff, hsio_sata_value); ++	hsio_update(0xea000a90, ~0x0000ffff, hsio_sata_value); ++ ++	hsio_sata_shared_update(0xea002090, ~0x0000ffff, hsio_sata_value); ++	hsio_sata_shared_update(0xea002290, ~0x0000ffff, hsio_sata_value); ++} ++ ++void program_hsio_xhci_lpt_h_cx(void) ++{ ++	const struct hsio_table_row *pch_hsio_table; ++	size_t len; ++ ++	pch_hsio_table = hsio_xhci_lpt_h_cx; ++	len = ARRAY_SIZE(hsio_xhci_lpt_h_cx); ++ ++	for (size_t i = 0; i < len; i++) ++		hsio_update_row(pch_hsio_table[i]); ++ ++	pch_hsio_table = hsio_xhci_shared_lpt_h_cx; ++	len = ARRAY_SIZE(hsio_xhci_shared_lpt_h_cx); ++ ++	for (size_t i = 0; i < len; i++) ++		hsio_xhci_shared_update_row(pch_hsio_table[i]); ++} ++ ++void program_hsio_igbe_lpt_h_cx(void) ++{ ++	const uint32_t strpfusecfg1 = pci_read_config32(PCI_DEV(0, 0x1c, 0), 0xfc); ++	if (!(strpfusecfg1 & (1 << 19))) ++		return; ++ ++	const uint8_t gbe_port = (strpfusecfg1 >> 16) & 0x7; ++	const uint8_t lane_owner = pci_read_config8(PCI_DEV(0, 0x1c, 0), 0x410); ++	if (gbe_port == 0 && ((lane_owner >> 0) & 3) != 1) ++		return; ++ ++	if (gbe_port == 1 && ((lane_owner >> 2) & 3) != 1) ++		return; ++ ++	const uint32_t gbe_hsio_base = 0xe900 << 16 | (0x2e - 2 * gbe_port) << 8; ++	hsio_update(gbe_hsio_base + 0x08, ~0xf0000100, 0xe0000100); ++} +diff --git a/src/southbridge/intel/lynxpoint/hsio/lpt_lp_bx.c b/src/southbridge/intel/lynxpoint/hsio/lpt_lp_bx.c +new file mode 100644 +index 0000000000..24679e791a +--- /dev/null ++++ b/src/southbridge/intel/lynxpoint/hsio/lpt_lp_bx.c +@@ -0,0 +1,180 @@ ++/* SPDX-License-Identifier: GPL-2.0-or-later */ ++ ++#include <device/pci_ops.h> ++#include <southbridge/intel/lynxpoint/iobp.h> ++#include <southbridge/intel/lynxpoint/hsio/hsio.h> ++#include <types.h> ++ ++const struct hsio_table_row hsio_sata_shared_lpt_lp_bx[] = { ++	{ 0xea008008, ~0xff000000, 0x1c000000 }, ++	{ 0xea002008, ~0xfffc6108, 0xea6c6108 }, ++	{ 0xea002208, ~0xfffc6108, 0xea6c6108 }, ++	{ 0xea002408, ~0xfffc6108, 0xea6c6108 }, ++	{ 0xea002608, ~0xfffc6108, 0xea6c6108 }, ++	{ 0xea002038, ~0x0000000f, 0x0000000d }, ++	{ 0xea002238, ~0x0000000f, 0x0000000d }, ++	{ 0xea002438, ~0x0000000f, 0x0000000d }, ++	{ 0xea002638, ~0x0000000f, 0x0000000d }, ++	{ 0xea00202c, ~0x00020f00, 0x00020100 }, ++	{ 0xea00222c, ~0x00020f00, 0x00020100 }, ++	{ 0xea00242c, ~0x00020f00, 0x00020100 }, ++	{ 0xea00262c, ~0x00020f00, 0x00020100 }, ++	{ 0xea002040, ~0x1f000000, 0x01000000 }, ++	{ 0xea002240, ~0x1f000000, 0x01000000 }, ++	{ 0xea002440, ~0x1f000000, 0x01000000 }, ++	{ 0xea002640, ~0x1f000000, 0x01000000 }, ++	{ 0xea002010, ~0xffff0000, 0x55510000 }, ++	{ 0xea002210, ~0xffff0000, 0x55510000 }, ++	{ 0xea002410, ~0xffff0000, 0x55510000 }, ++	{ 0xea002610, ~0xffff0000, 0x55510000 }, ++	{ 0xea002140, ~0x00ffffff, 0x00140718 }, ++	{ 0xea002340, ~0x00ffffff, 0x00140718 }, ++	{ 0xea002540, ~0x00ffffff, 0x00140718 }, ++	{ 0xea002740, ~0x00ffffff, 0x00140718 }, ++	{ 0xea002144, ~0x00ffffff, 0x00140998 }, ++	{ 0xea002344, ~0x00ffffff, 0x00140998 }, ++	{ 0xea002544, ~0x00ffffff, 0x00140998 }, ++	{ 0xea002744, ~0x00ffffff, 0x00140998 }, ++	{ 0xea002148, ~0x00ffffff, 0x00140998 }, ++	{ 0xea002348, ~0x00ffffff, 0x00140998 }, ++	{ 0xea002548, ~0x00ffffff, 0x00140998 }, ++	{ 0xea002748, ~0x00ffffff, 0x00140998 }, ++	{ 0xea00217c, ~0x03000000, 0x03000000 }, ++	{ 0xea00237c, ~0x03000000, 0x03000000 }, ++	{ 0xea00257c, ~0x03000000, 0x03000000 }, ++	{ 0xea00277c, ~0x03000000, 0x03000000 }, ++	{ 0xea00208c, ~0x00ff0000, 0x00800000 }, ++	{ 0xea00228c, ~0x00ff0000, 0x00800000 }, ++	{ 0xea00248c, ~0x00ff0000, 0x00800000 }, ++	{ 0xea00268c, ~0x00ff0000, 0x00800000 }, ++	{ 0xea0020a4, ~0x0030ff00, 0x00308300 }, ++	{ 0xea0022a4, ~0x0030ff00, 0x00308300 }, ++	{ 0xea0024a4, ~0x0030ff00, 0x00308300 }, ++	{ 0xea0026a4, ~0x0030ff00, 0x00308300 }, ++	{ 0xea0020ac, ~0x00000030, 0x00000020 }, ++	{ 0xea0022ac, ~0x00000030, 0x00000020 }, ++	{ 0xea0024ac, ~0x00000030, 0x00000020 }, ++	{ 0xea0026ac, ~0x00000030, 0x00000020 }, ++	{ 0xea002018, ~0xffff0300, 0x38250100 }, ++	{ 0xea002218, ~0xffff0300, 0x38250100 }, ++	{ 0xea002418, ~0xffff0300, 0x38250100 }, ++	{ 0xea002618, ~0xffff0300, 0x38250100 }, ++	{ 0xea002000, ~0xcf030000, 0xcf030000 }, ++	{ 0xea002200, ~0xcf030000, 0xcf030000 }, ++	{ 0xea002400, ~0xcf030000, 0xcf030000 }, ++	{ 0xea002600, ~0xcf030000, 0xcf030000 }, ++	{ 0xea002028, ~0xff1f0000, 0x580e0000 }, ++	{ 0xea002228, ~0xff1f0000, 0x580e0000 }, ++	{ 0xea002428, ~0xff1f0000, 0x580e0000 }, ++	{ 0xea002628, ~0xff1f0000, 0x580e0000 }, ++	{ 0xea00201c, ~0x00007c00, 0x00002400 }, ++	{ 0xea00221c, ~0x00007c00, 0x00002400 }, ++	{ 0xea00241c, ~0x00007c00, 0x00002400 }, ++	{ 0xea00261c, ~0x00007c00, 0x00002400 }, ++	{ 0xea002178, ~0x00001f00, 0x00001800 }, ++	{ 0xea002378, ~0x00001f00, 0x00001800 }, ++	{ 0xea002578, ~0x00001f00, 0x00001800 }, ++	{ 0xea002778, ~0x00001f00, 0x00001800 }, ++	{ 0xea00210c, ~0x0038000f, 0x00000005 }, ++	{ 0xea00230c, ~0x0038000f, 0x00000005 }, ++	{ 0xea00250c, ~0x0038000f, 0x00000005 }, ++	{ 0xea00270c, ~0x0038000f, 0x00000005 }, ++}; ++ ++const struct hsio_table_row hsio_xhci_shared_lpt_lp_bx[] = { ++	{ 0xe90025cc, ~0x00001407, 0x00001407 }, ++	{ 0xe90027cc, ~0x00001407, 0x00001407 }, ++	{ 0xe9002568, ~0x01000f3c, 0x00000a28 }, ++	{ 0xe9002768, ~0x01000f3c, 0x00000a28 }, ++	{ 0xe900242c, ~0x00000700, 0x00000100 }, ++	{ 0xe900262c, ~0x00000700, 0x00000100 }, ++	{ 0xe900256c, ~0x000000ff, 0x0000003f }, ++	{ 0xe900276c, ~0x000000ff, 0x0000003f }, ++	{ 0xe900254c, ~0x00ffff00, 0x00120500 }, ++	{ 0xe900274c, ~0x00ffff00, 0x00120500 }, ++	{ 0xe9002564, ~0x0000f000, 0x00005000 }, ++	{ 0xe9002764, ~0x0000f000, 0x00005000 }, ++	{ 0xe9002570, ~0x00000018, 0x00000000 }, ++	{ 0xe9002770, ~0x00000018, 0x00000000 }, ++	{ 0xe9002514, ~0x38000700, 0x00000100 }, ++	{ 0xe9002714, ~0x38000700, 0x00000100 }, ++	{ 0xe9002438, ~0x0000000f, 0x0000000b }, ++	{ 0xe9002638, ~0x0000000f, 0x0000000b }, ++	{ 0xe9002414, ~0x0000fe00, 0x00006600 }, ++	{ 0xe9002614, ~0x0000fe00, 0x00006600 }, ++	{ 0xe9002540, ~0x00800000, 0x00000000 }, ++	{ 0xe9002740, ~0x00800000, 0x00000000 }, ++}; ++ ++const struct hsio_table_row hsio_xhci_lpt_lp_bx[] = { ++	{ 0xe90021cc, ~0x00001407, 0x00001407 }, ++	{ 0xe90023cc, ~0x00001407, 0x00001407 }, ++	{ 0xe9002168, ~0x01000f3c, 0x00000a28 }, ++	{ 0xe9002368, ~0x01000f3c, 0x00000a28 }, ++	{ 0xe900216c, ~0x000000ff, 0x0000003f }, ++	{ 0xe900236c, ~0x000000ff, 0x0000003f }, ++	{ 0xe900214c, ~0x00ffff00, 0x00120500 }, ++	{ 0xe900234c, ~0x00ffff00, 0x00120500 }, ++	{ 0xe9002164, ~0x0000f000, 0x00005000 }, ++	{ 0xe9002364, ~0x0000f000, 0x00005000 }, ++	{ 0xe9002170, ~0x00000018, 0x00000000 }, ++	{ 0xe9002370, ~0x00000018, 0x00000000 }, ++	{ 0xe9002114, ~0x38000700, 0x00000100 }, ++	{ 0xe9002314, ~0x38000700, 0x00000100 }, ++	{ 0xe9002038, ~0x0000000f, 0x0000000b }, ++	{ 0xe9002238, ~0x0000000f, 0x0000000b }, ++	{ 0xe9002014, ~0x0000fe00, 0x00006600 }, ++	{ 0xe9002214, ~0x0000fe00, 0x00006600 }, ++	{ 0xe9002140, ~0x00800000, 0x00000000 }, ++	{ 0xe9002340, ~0x00800000, 0x00000000 }, ++}; ++ ++void program_hsio_sata_lpt_lp_bx(const bool is_mobile) ++{ ++	const struct hsio_table_row *pch_hsio_table; ++	size_t len; ++ ++	pch_hsio_table = hsio_sata_shared_lpt_lp_bx; ++	len = ARRAY_SIZE(hsio_sata_shared_lpt_lp_bx); ++	for (size_t i = 0; i < len; i++) ++		hsio_sata_shared_update_row(pch_hsio_table[i]); ++ ++	const uint32_t hsio_sata_value = is_mobile ? 0x00004c5a : 0x00003e67; ++ ++	hsio_sata_shared_update(0xea002090, ~0x0000ffff, hsio_sata_value); ++	hsio_sata_shared_update(0xea002290, ~0x0000ffff, hsio_sata_value); ++	hsio_sata_shared_update(0xea002490, ~0x0000ffff, hsio_sata_value); ++	hsio_sata_shared_update(0xea002690, ~0x0000ffff, hsio_sata_value); ++} ++ ++void program_hsio_xhci_lpt_lp_bx(void) ++{ ++	const struct hsio_table_row *pch_hsio_table; ++	size_t len; ++ ++	pch_hsio_table = hsio_xhci_lpt_lp_bx; ++	len = ARRAY_SIZE(hsio_xhci_lpt_lp_bx); ++ ++	for (size_t i = 0; i < len; i++) ++		hsio_update_row(pch_hsio_table[i]); ++ ++	pch_hsio_table = hsio_xhci_shared_lpt_lp_bx; ++	len = ARRAY_SIZE(hsio_xhci_shared_lpt_lp_bx); ++ ++	for (size_t i = 0; i < len; i++) ++		hsio_xhci_shared_update_row(pch_hsio_table[i]); ++} ++ ++void program_hsio_igbe_lpt_lp_bx(void) ++{ ++	const uint32_t strpfusecfg1 = pci_read_config32(PCI_DEV(0, 0x1c, 0), 0xfc); ++	if (!(strpfusecfg1 & (1 << 19))) ++		return; ++ ++	const uint8_t gbe_port = (strpfusecfg1 >> 16) & 0x7; ++	if (gbe_port > 5) ++		return; ++ ++	const uint32_t gbe_hsio_base = 0xe900 << 16 | (0x08 + 2 * gbe_port) << 8; ++	hsio_update(gbe_hsio_base + 0x08, ~0xf0000100, 0xe0000100); ++} +diff --git a/src/southbridge/intel/lynxpoint/pch.h b/src/southbridge/intel/lynxpoint/pch.h +index 38a9349220..74b4d50017 100644 +--- a/src/southbridge/intel/lynxpoint/pch.h ++++ b/src/southbridge/intel/lynxpoint/pch.h +@@ -117,6 +117,7 @@ void pch_dmi_setup_physical_layer(void); + void pch_dmi_tc_vc_mapping(u32 vc0, u32 vc1, u32 vcp, u32 vcm); + void early_usb_init(void); + void early_thermal_init(void); ++void early_pch_init_native(int s3resume); +  + void usb_ehci_sleep_prepare(pci_devfn_t dev, u8 slp_typ); + void usb_ehci_disable(pci_devfn_t dev); +@@ -271,6 +272,10 @@ void mainboard_config_rcba(void); + #define   IDE_DECODE_ENABLE	(1 << 15) + #define IDE_TIM_SEC		0x42	/* IDE timings, secondary */ +  ++#define SATA_MAP		0x90 ++#define SATA_PCS		0x92 ++#define SATA_SCLKG		0x94 ++ + #define SATA_SIRI		0xa0 /* SATA Indexed Register Index */ + #define SATA_SIRD		0xa4 /* SATA Indexed Register Data */ + #define SATA_SP			0xd0 /* Scratchpad */ +@@ -580,6 +585,7 @@ void mainboard_config_rcba(void); + #define D19IR		0x3168	/* 16bit */ + #define ACPIIRQEN	0x31e0	/* 32bit */ + #define OIC		0x31fe	/* 16bit */ ++#define PRSTS		0x3310	/* 32bit */ + #define PMSYNC_CONFIG	0x33c4	/* 32bit */ + #define PMSYNC_CONFIG2	0x33cc	/* 32bit */ + #define SOFT_RESET_CTRL 0x38f4 +--  +2.39.2 + diff --git a/config/coreboot/haswell/patches/0008-nb-intel-haswell-Add-native-raminit-scaffolding.patch b/config/coreboot/haswell/patches/0008-nb-intel-haswell-Add-native-raminit-scaffolding.patch new file mode 100644 index 00000000..6df828eb --- /dev/null +++ b/config/coreboot/haswell/patches/0008-nb-intel-haswell-Add-native-raminit-scaffolding.patch @@ -0,0 +1,407 @@ +From 46cdec8cbce15ca11ad9a49a3ee415a78f781997 Mon Sep 17 00:00:00 2001 +From: Angel Pons <th3fanbus@gmail.com> +Date: Sat, 7 May 2022 00:26:10 +0200 +Subject: [PATCH 08/26] nb/intel/haswell: Add native raminit scaffolding + +Implement some scaffolding for Haswell native raminit, like bootmode +selection, handling of MRC cache and CPU detection. + +Change-Id: Icd96649fa045ea7f0f32ae9bfe1e60498d93975b +Signed-off-by: Angel Pons <th3fanbus@gmail.com> +--- + .../intel/haswell/native_raminit/Makefile.inc |   1 + + .../haswell/native_raminit/raminit_main.c     | 104 ++++++++++ + .../haswell/native_raminit/raminit_native.c   | 189 +++++++++++++++++- + .../haswell/native_raminit/raminit_native.h   |  34 ++++ + 4 files changed, 322 insertions(+), 6 deletions(-) + create mode 100644 src/northbridge/intel/haswell/native_raminit/raminit_main.c + create mode 100644 src/northbridge/intel/haswell/native_raminit/raminit_native.h + +diff --git a/src/northbridge/intel/haswell/native_raminit/Makefile.inc b/src/northbridge/intel/haswell/native_raminit/Makefile.inc +index 8cfb4fb33e..90af951c5a 100644 +--- a/src/northbridge/intel/haswell/native_raminit/Makefile.inc ++++ b/src/northbridge/intel/haswell/native_raminit/Makefile.inc +@@ -1,3 +1,4 @@ + ## SPDX-License-Identifier: GPL-2.0-or-later +  ++romstage-y += raminit_main.c + romstage-y += raminit_native.c +diff --git a/src/northbridge/intel/haswell/native_raminit/raminit_main.c b/src/northbridge/intel/haswell/native_raminit/raminit_main.c +new file mode 100644 +index 0000000000..9b42c25b40 +--- /dev/null ++++ b/src/northbridge/intel/haswell/native_raminit/raminit_main.c +@@ -0,0 +1,104 @@ ++/* SPDX-License-Identifier: GPL-2.0-or-later */ ++ ++#include <assert.h> ++#include <console/console.h> ++#include <cpu/intel/haswell/haswell.h> ++#include <delay.h> ++#include <device/pci_ops.h> ++#include <northbridge/intel/haswell/chip.h> ++#include <northbridge/intel/haswell/haswell.h> ++#include <northbridge/intel/haswell/raminit.h> ++#include <string.h> ++#include <types.h> ++ ++#include "raminit_native.h" ++ ++struct task_entry { ++	enum raminit_status (*task)(struct sysinfo *); ++	bool is_enabled; ++	const char *name; ++}; ++ ++static const struct task_entry cold_boot[] = { ++}; ++ ++/* Return a generic stepping value to make stepping checks simpler */ ++static enum generic_stepping get_stepping(const uint32_t cpuid) ++{ ++	switch (cpuid) { ++	case CPUID_HASWELL_A0: ++		die("Haswell stepping A0 is not supported\n"); ++	case CPUID_HASWELL_B0: ++	case CPUID_HASWELL_ULT_B0: ++	case CPUID_CRYSTALWELL_B0: ++		return STEPPING_B0; ++	case CPUID_HASWELL_C0: ++	case CPUID_HASWELL_ULT_C0: ++	case CPUID_CRYSTALWELL_C0: ++		return STEPPING_C0; ++	default: ++		/** TODO: Add Broadwell support someday **/ ++		die("Unknown CPUID 0x%x\n", cpuid); ++	} ++} ++ ++static void initialize_ctrl(struct sysinfo *ctrl) ++{ ++	const struct northbridge_intel_haswell_config *cfg = config_of_soc(); ++	const enum raminit_boot_mode bootmode = ctrl->bootmode; ++ ++	memset(ctrl, 0, sizeof(*ctrl)); ++ ++	ctrl->cpu = cpu_get_cpuid(); ++	ctrl->stepping = get_stepping(ctrl->cpu); ++	ctrl->dq_pins_interleaved = cfg->dq_pins_interleaved; ++	ctrl->bootmode = bootmode; ++} ++ ++static enum raminit_status try_raminit(struct sysinfo *ctrl) ++{ ++	const struct task_entry *const schedule = cold_boot; ++	const size_t length = ARRAY_SIZE(cold_boot); ++ ++	enum raminit_status status = RAMINIT_STATUS_UNSPECIFIED_ERROR; ++ ++	for (size_t i = 0; i < length; i++) { ++		const struct task_entry *const entry = &schedule[i]; ++		assert(entry); ++		assert(entry->name); ++		if (!entry->is_enabled) ++			continue; ++ ++		assert(entry->task); ++		printk(RAM_DEBUG, "\nExecuting raminit task %s\n", entry->name); ++		status = entry->task(ctrl); ++		printk(RAM_DEBUG, "\n"); ++		if (status) { ++			printk(BIOS_ERR, "raminit failed on step %s\n", entry->name); ++			break; ++		} ++	} ++ ++	return status; ++} ++ ++void raminit_main(const enum raminit_boot_mode bootmode) ++{ ++	/* ++	 * The mighty_ctrl struct. Will happily nuke the pre-RAM stack ++	 * if left unattended. Make it static and pass pointers to it. ++	 */ ++	static struct sysinfo mighty_ctrl; ++ ++	mighty_ctrl.bootmode = bootmode; ++	initialize_ctrl(&mighty_ctrl); ++ ++	/** TODO: Try more than once **/ ++	enum raminit_status status = try_raminit(&mighty_ctrl); ++ ++	if (status != RAMINIT_STATUS_SUCCESS) ++		die("Memory initialization was met with utmost failure and misery\n"); ++ ++	/** TODO: Implement the required magic **/ ++	die("NATIVE RAMINIT: More Magic (tm) required.\n"); ++} +diff --git a/src/northbridge/intel/haswell/native_raminit/raminit_native.c b/src/northbridge/intel/haswell/native_raminit/raminit_native.c +index b6efb6b40d..0869db3902 100644 +--- a/src/northbridge/intel/haswell/native_raminit/raminit_native.c ++++ b/src/northbridge/intel/haswell/native_raminit/raminit_native.c +@@ -1,13 +1,45 @@ + /* SPDX-License-Identifier: GPL-2.0-or-later */ +  ++#include <arch/cpu.h> ++#include <assert.h> ++#include <cbmem.h> ++#include <cf9_reset.h> + #include <console/console.h> ++#include <cpu/x86/msr.h> + #include <delay.h> ++#include <device/pci_ops.h> ++#include <mrc_cache.h> + #include <northbridge/intel/haswell/haswell.h> + #include <northbridge/intel/haswell/raminit.h> + #include <southbridge/intel/lynxpoint/me.h> + #include <southbridge/intel/lynxpoint/pch.h> + #include <types.h> +  ++#include "raminit_native.h" ++ ++static void wait_txt_clear(void) ++{ ++	const struct cpuid_result cpuid = cpuid_ext(1, 0); ++ ++	/* Check if TXT is supported */ ++	if (!(cpuid.ecx & BIT(6))) ++		return; ++ ++	/* Some TXT public bit */ ++	if (!(read32p(0xfed30010) & 1)) ++		return; ++ ++	/* Wait for TXT clear */ ++	do {} while (!(read8p(0xfed40000) & (1 << 7))); ++} ++ ++static enum raminit_boot_mode get_boot_mode(void) ++{ ++	const uint16_t pmcon_2 = pci_read_config16(PCH_LPC_DEV, GEN_PMCON_2); ++	const uint16_t bitmask = GEN_PMCON_2_DISB | GEN_PMCON_2_MEM_SR; ++	return (pmcon_2 & bitmask) == bitmask ? BOOTMODE_WARM : BOOTMODE_COLD; ++} ++ + static bool early_init_native(int s3resume) + { + 	printk(BIOS_DEBUG, "Starting native platform initialisation\n"); +@@ -24,6 +56,120 @@ static bool early_init_native(int s3resume) + 	return cpu_replaced; + } +  ++#define MRC_CACHE_VERSION 1 ++ ++struct mrc_data { ++	const void *buffer; ++	size_t buffer_len; ++}; ++ ++static void save_mrc_data(struct mrc_data *md) ++{ ++	mrc_cache_stash_data(MRC_TRAINING_DATA, MRC_CACHE_VERSION, md->buffer, md->buffer_len); ++} ++ ++static struct mrc_data prepare_mrc_cache(void) ++{ ++	struct mrc_data md = {0}; ++	md.buffer = mrc_cache_current_mmap_leak(MRC_TRAINING_DATA, ++						MRC_CACHE_VERSION, ++						&md.buffer_len); ++	return md; ++} ++ ++static const char *const bm_names[] = { ++	"BOOTMODE_COLD", ++	"BOOTMODE_WARM", ++	"BOOTMODE_S3", ++	"BOOTMODE_FAST", ++}; ++ ++static void clear_disb(void) ++{ ++	pci_and_config16(PCH_LPC_DEV, GEN_PMCON_2, ~GEN_PMCON_2_DISB); ++} ++ ++static void raminit_reset(void) ++{ ++	clear_disb(); ++	system_reset(); ++} ++ ++static enum raminit_boot_mode do_actual_raminit( ++	struct mrc_data *md, ++	const bool s3resume, ++	const bool cpu_replaced, ++	const enum raminit_boot_mode orig_bootmode) ++{ ++	enum raminit_boot_mode bootmode = orig_bootmode; ++ ++	bool save_data_valid = md->buffer && md->buffer_len == USHRT_MAX; /** TODO: sizeof() **/ ++ ++	if (s3resume) { ++		if (bootmode == BOOTMODE_COLD) { ++			printk(BIOS_EMERG, "Memory may not be in self-refresh for S3 resume\n"); ++			printk(BIOS_EMERG, "S3 resume and cold boot are mutually exclusive\n"); ++			raminit_reset(); ++		} ++		/* Only a true mad hatter would replace a CPU in S3 */ ++		if (cpu_replaced) { ++			printk(BIOS_EMERG, "Oh no, CPU was replaced during S3\n"); ++			/* ++			 * No reason to continue, memory consistency is most likely lost ++			 * and ME will probably request a reset through DID response too. ++			 */ ++			/** TODO: Figure out why past self commented this out **/ ++			//raminit_reset(); ++		} ++		bootmode = BOOTMODE_S3; ++		if (!save_data_valid) { ++			printk(BIOS_EMERG, "No training data, S3 resume is impossible\n"); ++			/* Failed S3 resume, reset to come up cleanly */ ++			raminit_reset(); ++		} ++	} ++	if (!s3resume && cpu_replaced) { ++		printk(BIOS_NOTICE, "CPU was replaced, forcing a cold boot\n"); ++		/* ++		 * Looks like the ME will get angry if raminit takes too long. ++		 * It will report that the CPU has been replaced on next boot. ++		 * Try to continue anyway. This should not happen in most cases. ++		 */ ++		/** TODO: Figure out why past self commented this out **/ ++		//save_data_valid = false; ++	} ++	if (bootmode == BOOTMODE_COLD) { ++		/* If possible, promote to a fast boot */ ++		if (save_data_valid) ++			bootmode = BOOTMODE_FAST; ++ ++		clear_disb(); ++	} else if (bootmode == BOOTMODE_WARM) { ++		/* If a warm reset happened before raminit is done, force a cold boot */ ++		if (mchbar_read32(SSKPD) == 0 && mchbar_read32(SSKPD + 4) == 0) { ++			printk(BIOS_NOTICE, "Warm reset occurred early in cold boot\n"); ++			save_data_valid = false; ++		} ++		if (!save_data_valid) ++			bootmode = BOOTMODE_COLD; ++	} ++	assert(save_data_valid != (bootmode == BOOTMODE_COLD)); ++	if (save_data_valid) { ++		printk(BIOS_INFO, "Using cached memory parameters\n"); ++		die("RAMINIT: Fast boot is not yet implemented\n"); ++	} ++	printk(RAM_DEBUG, "Initial bootmode: %s\n", bm_names[orig_bootmode]); ++	printk(RAM_DEBUG, "Current bootmode: %s\n", bm_names[bootmode]); ++ ++	/* ++	 * And now, the actual memory initialization thing. ++	 */ ++	printk(RAM_DEBUG, "\nStarting native raminit\n"); ++	raminit_main(bootmode); ++ ++	return bootmode; ++} ++ + void perform_raminit(const int s3resume) + { + 	/* +@@ -32,17 +178,48 @@ void perform_raminit(const int s3resume) + 	 */ + 	const bool cpu_replaced = early_init_native(s3resume); +  +-	(void)cpu_replaced; ++	wait_txt_clear(); ++	wrmsr(0x2e6, (msr_t) {.lo = 0, .hi = 0}); ++ ++	const enum raminit_boot_mode orig_bootmode = get_boot_mode(); ++ ++	struct mrc_data md = prepare_mrc_cache(); ++ ++	const enum raminit_boot_mode bootmode = ++			do_actual_raminit(&md, s3resume, cpu_replaced, orig_bootmode); ++ ++	/** TODO: report_memory_config **/ +  +-	/** TODO: Move after raminit */ + 	if (intel_early_me_uma_size() > 0) { +-		/** TODO: Update status once raminit is implemented **/ +-		uint8_t me_status = ME_INIT_STATUS_ERROR; ++		/* ++		 * The 'other' success value is to report loss of memory ++		 * consistency to ME if warm boot was downgraded to cold. ++		 */ ++		uint8_t me_status; ++		if (BOOTMODE_WARM == orig_bootmode && BOOTMODE_COLD == bootmode) ++			me_status = ME_INIT_STATUS_SUCCESS_OTHER; ++		else ++			me_status = ME_INIT_STATUS_SUCCESS; ++ ++		/** TODO: Remove this once raminit is implemented **/ ++		me_status = ME_INIT_STATUS_ERROR; + 		intel_early_me_init_done(me_status); + 	} +  ++	post_code(0x3b); ++ + 	intel_early_me_status(); +  +-	/** TODO: Implement the required magic **/ +-	die("NATIVE RAMINIT: More Magic (tm) required.\n"); ++	const bool cbmem_was_initted = !cbmem_recovery(s3resume); ++	if (s3resume && !cbmem_was_initted) { ++		/* Failed S3 resume, reset to come up cleanly */ ++		printk(BIOS_CRIT, "Failed to recover CBMEM in S3 resume.\n"); ++		system_reset(); ++	} ++ ++	/* Save training data on non-S3 resumes */ ++	if (!s3resume) ++		save_mrc_data(&md); ++ ++	/** TODO: setup_sdram_meminfo **/ + } +diff --git a/src/northbridge/intel/haswell/native_raminit/raminit_native.h b/src/northbridge/intel/haswell/native_raminit/raminit_native.h +new file mode 100644 +index 0000000000..885f0184f4 +--- /dev/null ++++ b/src/northbridge/intel/haswell/native_raminit/raminit_native.h +@@ -0,0 +1,34 @@ ++/* SPDX-License-Identifier: GPL-2.0-or-later */ ++ ++#ifndef HASWELL_RAMINIT_NATIVE_H ++#define HASWELL_RAMINIT_NATIVE_H ++ ++enum raminit_boot_mode { ++	BOOTMODE_COLD, ++	BOOTMODE_WARM, ++	BOOTMODE_S3, ++	BOOTMODE_FAST, ++}; ++ ++enum raminit_status { ++	RAMINIT_STATUS_SUCCESS = 0, ++	RAMINIT_STATUS_UNSPECIFIED_ERROR, /** TODO: Deprecated in favor of specific values **/ ++}; ++ ++enum generic_stepping { ++	STEPPING_A0 = 1, ++	STEPPING_B0 = 2, ++	STEPPING_C0 = 3, ++}; ++ ++struct sysinfo { ++	enum raminit_boot_mode bootmode; ++	enum generic_stepping stepping; ++	uint32_t cpu;		/* CPUID value */ ++ ++	bool dq_pins_interleaved; ++}; ++ ++void raminit_main(enum raminit_boot_mode bootmode); ++ ++#endif +--  +2.39.2 + diff --git a/config/coreboot/haswell/patches/0009-nb-intel-haswell-nri-Only-do-CPU-replacement-check-o.patch b/config/coreboot/haswell/patches/0009-nb-intel-haswell-nri-Only-do-CPU-replacement-check-o.patch new file mode 100644 index 00000000..07525d18 --- /dev/null +++ b/config/coreboot/haswell/patches/0009-nb-intel-haswell-nri-Only-do-CPU-replacement-check-o.patch @@ -0,0 +1,57 @@ +From 731216aef3129ae27ad5adc7266cb8a58090c9fc Mon Sep 17 00:00:00 2001 +From: Angel Pons <th3fanbus@gmail.com> +Date: Sun, 26 Jun 2022 10:32:12 +0200 +Subject: [PATCH 09/26] nb/intel/haswell/nri: Only do CPU replacement check on + cold boots + +CPU replacement check should only be done on cold boots. + +Change-Id: I98efa105f4df755b23febe12dd7b356787847852 +Signed-off-by: Angel Pons <th3fanbus@gmail.com> +--- + .../intel/haswell/native_raminit/raminit_native.c   | 13 ++++++------- + 1 file changed, 6 insertions(+), 7 deletions(-) + +diff --git a/src/northbridge/intel/haswell/native_raminit/raminit_native.c b/src/northbridge/intel/haswell/native_raminit/raminit_native.c +index 0869db3902..bd9bc8e692 100644 +--- a/src/northbridge/intel/haswell/native_raminit/raminit_native.c ++++ b/src/northbridge/intel/haswell/native_raminit/raminit_native.c +@@ -40,15 +40,14 @@ static enum raminit_boot_mode get_boot_mode(void) + 	return (pmcon_2 & bitmask) == bitmask ? BOOTMODE_WARM : BOOTMODE_COLD; + } +  +-static bool early_init_native(int s3resume) ++static bool early_init_native(enum raminit_boot_mode bootmode) + { + 	printk(BIOS_DEBUG, "Starting native platform initialisation\n"); +  + 	intel_early_me_init(); +-	/** TODO: CPU replacement check must be skipped in warm boots and S3 resumes **/ +-	const bool cpu_replaced = !s3resume && intel_early_me_cpu_replacement_check(); ++	bool cpu_replaced = bootmode == BOOTMODE_COLD && intel_early_me_cpu_replacement_check(); +  +-	early_pch_init_native(s3resume); ++	early_pch_init_native(bootmode == BOOTMODE_S3); +  + 	if (!CONFIG(INTEL_LYNXPOINT_LP)) + 		dmi_early_init(); +@@ -176,13 +175,13 @@ void perform_raminit(const int s3resume) + 	 * See, this function's name is a lie. There are more things to + 	 * do that memory initialisation, but they are relatively easy. + 	 */ +-	const bool cpu_replaced = early_init_native(s3resume); ++	const enum raminit_boot_mode orig_bootmode = get_boot_mode(); ++ ++	const bool cpu_replaced = early_init_native(s3resume ? BOOTMODE_S3 : orig_bootmode); +  + 	wait_txt_clear(); + 	wrmsr(0x2e6, (msr_t) {.lo = 0, .hi = 0}); +  +-	const enum raminit_boot_mode orig_bootmode = get_boot_mode(); +- + 	struct mrc_data md = prepare_mrc_cache(); +  + 	const enum raminit_boot_mode bootmode = +--  +2.39.2 + diff --git a/config/coreboot/haswell/patches/0010-haswell-NRI-Collect-SPD-info.patch b/config/coreboot/haswell/patches/0010-haswell-NRI-Collect-SPD-info.patch new file mode 100644 index 00000000..4c2a2670 --- /dev/null +++ b/config/coreboot/haswell/patches/0010-haswell-NRI-Collect-SPD-info.patch @@ -0,0 +1,344 @@ +From 354969af4361bcc7dc240ef5871d169728f7f0cc Mon Sep 17 00:00:00 2001 +From: Angel Pons <th3fanbus@gmail.com> +Date: Sat, 7 May 2022 13:48:53 +0200 +Subject: [PATCH 10/26] haswell NRI: Collect SPD info + +Collect SPD data from DIMMs and memory-down, and find the common +supported settings. + +Change-Id: I4e6a1408a638a463ecae37a447cfed1d6556e44a +Signed-off-by: Angel Pons <th3fanbus@gmail.com> +--- + .../intel/haswell/native_raminit/Makefile.inc |   1 + + .../haswell/native_raminit/raminit_main.c     |   1 + + .../haswell/native_raminit/raminit_native.h   |  57 +++++ + .../haswell/native_raminit/spd_bitmunching.c  | 206 ++++++++++++++++++ + 4 files changed, 265 insertions(+) + create mode 100644 src/northbridge/intel/haswell/native_raminit/spd_bitmunching.c + +diff --git a/src/northbridge/intel/haswell/native_raminit/Makefile.inc b/src/northbridge/intel/haswell/native_raminit/Makefile.inc +index 90af951c5a..ebf7abc6ec 100644 +--- a/src/northbridge/intel/haswell/native_raminit/Makefile.inc ++++ b/src/northbridge/intel/haswell/native_raminit/Makefile.inc +@@ -2,3 +2,4 @@ +  + romstage-y += raminit_main.c + romstage-y += raminit_native.c ++romstage-y += spd_bitmunching.c +diff --git a/src/northbridge/intel/haswell/native_raminit/raminit_main.c b/src/northbridge/intel/haswell/native_raminit/raminit_main.c +index 9b42c25b40..2d2cfa48bb 100644 +--- a/src/northbridge/intel/haswell/native_raminit/raminit_main.c ++++ b/src/northbridge/intel/haswell/native_raminit/raminit_main.c +@@ -20,6 +20,7 @@ struct task_entry { + }; +  + static const struct task_entry cold_boot[] = { ++	{ collect_spd_info,                                       true, "PROCSPD",    }, + }; +  + /* Return a generic stepping value to make stepping checks simpler */ +diff --git a/src/northbridge/intel/haswell/native_raminit/raminit_native.h b/src/northbridge/intel/haswell/native_raminit/raminit_native.h +index 885f0184f4..1a0793947e 100644 +--- a/src/northbridge/intel/haswell/native_raminit/raminit_native.h ++++ b/src/northbridge/intel/haswell/native_raminit/raminit_native.h +@@ -3,6 +3,15 @@ + #ifndef HASWELL_RAMINIT_NATIVE_H + #define HASWELL_RAMINIT_NATIVE_H +  ++#include <device/dram/ddr3.h> ++#include <northbridge/intel/haswell/haswell.h> ++ ++#define SPD_LEN 256 ++ ++/* 8 data lanes + 1 ECC lane */ ++#define NUM_LANES		9 ++#define NUM_LANES_NO_ECC	8 ++ + enum raminit_boot_mode { + 	BOOTMODE_COLD, + 	BOOTMODE_WARM, +@@ -12,6 +21,8 @@ enum raminit_boot_mode { +  + enum raminit_status { + 	RAMINIT_STATUS_SUCCESS = 0, ++	RAMINIT_STATUS_NO_MEMORY_INSTALLED, ++	RAMINIT_STATUS_UNSUPPORTED_MEMORY, + 	RAMINIT_STATUS_UNSPECIFIED_ERROR, /** TODO: Deprecated in favor of specific values **/ + }; +  +@@ -21,14 +32,60 @@ enum generic_stepping { + 	STEPPING_C0 = 3, + }; +  ++struct raminit_dimm_info { ++	spd_raw_data raw_spd; ++	struct dimm_attr_ddr3_st data; ++	uint8_t spd_addr; ++	bool valid; ++}; ++ + struct sysinfo { + 	enum raminit_boot_mode bootmode; + 	enum generic_stepping stepping; + 	uint32_t cpu;		/* CPUID value */ +  + 	bool dq_pins_interleaved; ++ ++	/** TODO: ECC support untested **/ ++	bool is_ecc; ++ ++	/** ++	 * FIXME: LPDDR support is incomplete. The largest chunks are missing, ++	 * but some LPDDR-specific variations in algorithms have been handled. ++	 * LPDDR-specific functions have stubs which will halt upon execution. ++	 */ ++	bool lpddr; ++ ++	struct raminit_dimm_info dimms[NUM_CHANNELS][NUM_SLOTS]; ++	union dimm_flags_ddr3_st flags; ++	uint16_t cas_supported; ++ ++	/* Except for tCK, everything is eventually stored in DCLKs */ ++	uint32_t tCK; ++	uint32_t tAA;			/* Also known as tCL */ ++	uint32_t tWR; ++	uint32_t tRCD; ++	uint32_t tRRD; ++	uint32_t tRP; ++	uint32_t tRAS; ++	uint32_t tRC; ++	uint32_t tRFC; ++	uint32_t tWTR; ++	uint32_t tRTP; ++	uint32_t tFAW; ++	uint32_t tCWL; ++	uint32_t tCMD; ++ ++	uint8_t lanes;			/* 8 or 9 */ ++	uint8_t chanmap; ++	uint8_t dpc[NUM_CHANNELS];	/* DIMMs per channel */ ++	uint8_t rankmap[NUM_CHANNELS]; ++	uint8_t rank_mirrored[NUM_CHANNELS]; ++	uint32_t channel_size_mb[NUM_CHANNELS]; + }; +  + void raminit_main(enum raminit_boot_mode bootmode); +  ++enum raminit_status collect_spd_info(struct sysinfo *ctrl); ++ + #endif +diff --git a/src/northbridge/intel/haswell/native_raminit/spd_bitmunching.c b/src/northbridge/intel/haswell/native_raminit/spd_bitmunching.c +new file mode 100644 +index 0000000000..dbe02c72d0 +--- /dev/null ++++ b/src/northbridge/intel/haswell/native_raminit/spd_bitmunching.c +@@ -0,0 +1,206 @@ ++/* SPDX-License-Identifier: GPL-2.0-or-later */ ++ ++#include <cbfs.h> ++#include <commonlib/clamp.h> ++#include <console/console.h> ++#include <device/dram/ddr3.h> ++#include <device/smbus_host.h> ++#include <northbridge/intel/haswell/haswell.h> ++#include <northbridge/intel/haswell/raminit.h> ++#include <string.h> ++#include <types.h> ++ ++#include "raminit_native.h" ++ ++static const uint8_t *get_spd_data_from_cbfs(struct spd_info *spdi) ++{ ++	if (!CONFIG(HAVE_SPD_IN_CBFS)) ++		return NULL; ++ ++	printk(RAM_DEBUG, "SPD index %u\n", spdi->spd_index); ++ ++	size_t spd_file_len; ++	uint8_t *spd_file = cbfs_map("spd.bin", &spd_file_len); ++ ++	if (!spd_file) { ++		printk(BIOS_ERR, "SPD data not found in CBFS\n"); ++		return NULL; ++	} ++ ++	if (spd_file_len < ((spdi->spd_index + 1) * SPD_LEN)) { ++		printk(BIOS_ERR, "SPD index override to 0 - old hardware?\n"); ++		spdi->spd_index = 0; ++	} ++ ++	if (spd_file_len < SPD_LEN) { ++		printk(BIOS_ERR, "Invalid SPD data in CBFS\n"); ++		return NULL; ++	} ++ ++	return spd_file + (spdi->spd_index * SPD_LEN); ++} ++ ++static void get_spd_for_dimm(struct raminit_dimm_info *const dimm, const uint8_t *cbfs_spd) ++{ ++	if (dimm->spd_addr == SPD_MEMORY_DOWN) { ++		if (cbfs_spd) { ++			memcpy(dimm->raw_spd, cbfs_spd, SPD_LEN); ++			dimm->valid = true; ++			printk(RAM_DEBUG, "memory-down\n"); ++			return; ++		} else { ++			printk(RAM_DEBUG, "memory-down but no CBFS SPD data, ignoring\n"); ++			return; ++		} ++	} ++	printk(RAM_DEBUG, "slotted "); ++	const uint8_t spd_mem_type = smbus_read_byte(dimm->spd_addr, SPD_MEMORY_TYPE); ++	if (spd_mem_type != SPD_MEMORY_TYPE_SDRAM_DDR3) { ++		printk(RAM_DEBUG, "and not DDR3, ignoring\n"); ++		return; ++	} ++	printk(RAM_DEBUG, "and DDR3\n"); ++	if (i2c_eeprom_read(dimm->spd_addr, 0, SPD_LEN, dimm->raw_spd) != SPD_LEN) { ++		printk(BIOS_WARNING, "I2C block read failed, trying SMBus byte reads\n"); ++		for (uint32_t i = 0; i < SPD_LEN; i++) ++			dimm->raw_spd[i] = smbus_read_byte(dimm->spd_addr, i); ++	} ++	dimm->valid = true; ++} ++ ++static void get_spd_data(struct sysinfo *ctrl) ++{ ++	struct spd_info spdi = {0}; ++	mb_get_spd_map(&spdi); ++	const uint8_t *cbfs_spd = get_spd_data_from_cbfs(&spdi); ++	for (uint8_t channel = 0; channel < NUM_CHANNELS; channel++) { ++		for (uint8_t slot = 0; slot < NUM_SLOTS; slot++) { ++			struct raminit_dimm_info *const dimm = &ctrl->dimms[channel][slot]; ++			dimm->spd_addr = spdi.addresses[channel + channel + slot]; ++			if (!dimm->spd_addr) ++				continue; ++ ++			printk(RAM_DEBUG, "CH%uS%u is ", channel, slot); ++			get_spd_for_dimm(dimm, cbfs_spd); ++		} ++	} ++} ++ ++static void decode_spd(struct raminit_dimm_info *const dimm) ++{ ++	/** TODO: Hook up somewhere, and handle lack of XMP data **/ ++	const bool enable_xmp = false; ++	memset(&dimm->data, 0, sizeof(dimm->data)); ++	if (enable_xmp) ++		spd_xmp_decode_ddr3(&dimm->data, dimm->raw_spd, DDR3_XMP_PROFILE_1); ++	else ++		spd_decode_ddr3(&dimm->data, dimm->raw_spd); ++ ++	if (CONFIG(DEBUG_RAM_SETUP)) ++		dram_print_spd_ddr3(&dimm->data); ++} ++ ++static enum raminit_status find_common_spd_parameters(struct sysinfo *ctrl) ++{ ++	ctrl->cas_supported = 0xffff; ++	ctrl->flags.raw = 0xffffffff; ++ ++	ctrl->tCK  = 0; ++	ctrl->tAA  = 0; ++	ctrl->tWR  = 0; ++	ctrl->tRCD = 0; ++	ctrl->tRRD = 0; ++	ctrl->tRP  = 0; ++	ctrl->tRAS = 0; ++	ctrl->tRC  = 0; ++	ctrl->tRFC = 0; ++	ctrl->tWTR = 0; ++	ctrl->tRTP = 0; ++	ctrl->tFAW = 0; ++	ctrl->tCWL = 0; ++	ctrl->tCMD = 0; ++	ctrl->chanmap = 0; ++ ++	bool yes_ecc = false; ++	bool not_ecc = false; ++ ++	for (uint8_t channel = 0; channel < NUM_CHANNELS; channel++) { ++		ctrl->dpc[channel] = 0; ++		ctrl->rankmap[channel] = 0; ++		ctrl->rank_mirrored[channel] = 0; ++		ctrl->channel_size_mb[channel] = 0; ++		for (uint8_t slot = 0; slot < NUM_SLOTS; slot++) { ++			struct raminit_dimm_info *const dimm = &ctrl->dimms[channel][slot]; ++			if (!dimm->valid) ++				continue; ++ ++			printk(RAM_DEBUG, "\nCH%uS%u SPD:\n", channel, slot); ++			decode_spd(dimm); ++ ++			ctrl->chanmap |= BIT(channel); ++			ctrl->dpc[channel]++; ++			ctrl->channel_size_mb[channel] += dimm->data.size_mb; ++ ++			/* The first rank of a populated slot is always present */ ++			const uint8_t rank = slot + slot; ++			assert(dimm->data.ranks); ++			ctrl->rankmap[channel] |= (BIT(dimm->data.ranks) - 1) << rank; ++ ++			if (dimm->data.flags.pins_mirrored) ++				ctrl->rank_mirrored[channel] |= BIT(rank + 1); ++ ++			/* Find common settings */ ++			ctrl->cas_supported &= dimm->data.cas_supported; ++			ctrl->flags.raw &= dimm->data.flags.raw; ++			ctrl->tCK  = MAX(ctrl->tCK,  dimm->data.tCK); ++			ctrl->tAA  = MAX(ctrl->tAA,  dimm->data.tAA); ++			ctrl->tWR  = MAX(ctrl->tWR,  dimm->data.tWR); ++			ctrl->tRCD = MAX(ctrl->tRCD, dimm->data.tRCD); ++			ctrl->tRRD = MAX(ctrl->tRRD, dimm->data.tRRD); ++			ctrl->tRP  = MAX(ctrl->tRP,  dimm->data.tRP); ++			ctrl->tRAS = MAX(ctrl->tRAS, dimm->data.tRAS); ++			ctrl->tRC  = MAX(ctrl->tRC,  dimm->data.tRC); ++			ctrl->tRFC = MAX(ctrl->tRFC, dimm->data.tRFC); ++			ctrl->tWTR = MAX(ctrl->tWTR, dimm->data.tWTR); ++			ctrl->tRTP = MAX(ctrl->tRTP, dimm->data.tRTP); ++			ctrl->tFAW = MAX(ctrl->tFAW, dimm->data.tFAW); ++			ctrl->tCWL = MAX(ctrl->tCWL, dimm->data.tCWL); ++			ctrl->tCMD = MAX(ctrl->tCMD, dimm->data.tCMD); ++ ++			yes_ecc |=  dimm->data.flags.is_ecc; ++			not_ecc |= !dimm->data.flags.is_ecc; ++		} ++	} ++ ++	if (!ctrl->chanmap) { ++		printk(BIOS_ERR, "No DIMMs were found\n"); ++		return RAMINIT_STATUS_NO_MEMORY_INSTALLED; ++	} ++	if (!ctrl->cas_supported) { ++		printk(BIOS_ERR, "Could not resolve common CAS latency\n"); ++		return RAMINIT_STATUS_UNSUPPORTED_MEMORY; ++	} ++	/** TODO: Properly handle ECC support and ECC forced **/ ++	if (yes_ecc && not_ecc) { ++		/** TODO: Test if the ECC DIMMs can be operated as non-ECC DIMMs **/ ++		printk(BIOS_ERR, "Both ECC and non-ECC DIMMs present, this is unsupported\n"); ++		return RAMINIT_STATUS_UNSUPPORTED_MEMORY; ++	} ++	if (yes_ecc) ++		ctrl->lanes = NUM_LANES; ++	else ++		ctrl->lanes = NUM_LANES_NO_ECC; ++ ++	ctrl->is_ecc = yes_ecc; ++ ++	/** TODO: Complete LPDDR support **/ ++	ctrl->lpddr = false; ++ ++	return RAMINIT_STATUS_SUCCESS; ++} ++ ++enum raminit_status collect_spd_info(struct sysinfo *ctrl) ++{ ++	get_spd_data(ctrl); ++	return find_common_spd_parameters(ctrl); ++} +--  +2.39.2 + diff --git a/config/coreboot/haswell/patches/0011-haswell-NRI-Initialise-MPLL.patch b/config/coreboot/haswell/patches/0011-haswell-NRI-Initialise-MPLL.patch new file mode 100644 index 00000000..1fec2e38 --- /dev/null +++ b/config/coreboot/haswell/patches/0011-haswell-NRI-Initialise-MPLL.patch @@ -0,0 +1,346 @@ +From 77a89d55ab7a715dc20c34a6edacaaf781b56087 Mon Sep 17 00:00:00 2001 +From: Angel Pons <th3fanbus@gmail.com> +Date: Sat, 7 May 2022 14:36:10 +0200 +Subject: [PATCH 11/26] haswell NRI: Initialise MPLL + +Add code to initialise the MPLL (Memory PLL). The procedure is similar +to the one for Sandy/Ivy Bridge, but it is not worth factoring out. + +Change-Id: I978c352de68f6d8cecc76f4ae3c12daaf4be9ed6 +Signed-off-by: Angel Pons <th3fanbus@gmail.com> +--- + .../intel/haswell/native_raminit/Makefile.inc |   2 + + .../intel/haswell/native_raminit/init_mpll.c  | 210 ++++++++++++++++++ + .../haswell/native_raminit/io_comp_control.c  |  22 ++ + .../haswell/native_raminit/raminit_main.c     |   1 + + .../haswell/native_raminit/raminit_native.h   |  11 + + .../intel/haswell/registers/mchbar.h          |   3 + + 6 files changed, 249 insertions(+) + create mode 100644 src/northbridge/intel/haswell/native_raminit/init_mpll.c + create mode 100644 src/northbridge/intel/haswell/native_raminit/io_comp_control.c + +diff --git a/src/northbridge/intel/haswell/native_raminit/Makefile.inc b/src/northbridge/intel/haswell/native_raminit/Makefile.inc +index ebf7abc6ec..c125d84f0b 100644 +--- a/src/northbridge/intel/haswell/native_raminit/Makefile.inc ++++ b/src/northbridge/intel/haswell/native_raminit/Makefile.inc +@@ -1,5 +1,7 @@ + ## SPDX-License-Identifier: GPL-2.0-or-later +  ++romstage-y += init_mpll.c ++romstage-y += io_comp_control.c + romstage-y += raminit_main.c + romstage-y += raminit_native.c + romstage-y += spd_bitmunching.c +diff --git a/src/northbridge/intel/haswell/native_raminit/init_mpll.c b/src/northbridge/intel/haswell/native_raminit/init_mpll.c +new file mode 100644 +index 0000000000..2faa183724 +--- /dev/null ++++ b/src/northbridge/intel/haswell/native_raminit/init_mpll.c +@@ -0,0 +1,210 @@ ++/* SPDX-License-Identifier: GPL-2.0-or-later */ ++ ++#include <commonlib/clamp.h> ++#include <console/console.h> ++#include <delay.h> ++#include <device/pci_ops.h> ++#include <northbridge/intel/haswell/haswell.h> ++#include <types.h> ++ ++#include "raminit_native.h" ++ ++static uint32_t get_mem_multiplier(const struct sysinfo *ctrl) ++{ ++	const uint32_t mult = NS2MHZ_DIV256 / (ctrl->tCK * ctrl->base_freq); ++ ++	if (ctrl->base_freq == 100) ++		return clamp_u32(7, mult, 12); ++ ++	if (ctrl->base_freq == 133) ++		return clamp_u32(3, mult, 10); ++ ++	die("Unsupported base frequency\n"); ++} ++ ++static void normalize_tck(struct sysinfo *ctrl, const bool pll_ref100) ++{ ++	/** TODO: Haswell supports up to DDR3-2600 **/ ++	if (ctrl->tCK <= TCK_1200MHZ) { ++		ctrl->tCK = TCK_1200MHZ; ++		ctrl->base_freq = 133; ++		ctrl->mem_clock_mhz = 1200; ++ ++	} else if (ctrl->tCK <= TCK_1100MHZ) { ++		ctrl->tCK = TCK_1100MHZ; ++		ctrl->base_freq = 100; ++		ctrl->mem_clock_mhz = 1100; ++ ++	} else if (ctrl->tCK <= TCK_1066MHZ) { ++		ctrl->tCK = TCK_1066MHZ; ++		ctrl->base_freq = 133; ++		ctrl->mem_clock_mhz = 1066; ++ ++	} else if (ctrl->tCK <= TCK_1000MHZ) { ++		ctrl->tCK = TCK_1000MHZ; ++		ctrl->base_freq = 100; ++		ctrl->mem_clock_mhz = 1000; ++ ++	} else if (ctrl->tCK <= TCK_933MHZ) { ++		ctrl->tCK = TCK_933MHZ; ++		ctrl->base_freq = 133; ++		ctrl->mem_clock_mhz = 933; ++ ++	} else if (ctrl->tCK <= TCK_900MHZ) { ++		ctrl->tCK = TCK_900MHZ; ++		ctrl->base_freq = 100; ++		ctrl->mem_clock_mhz = 900; ++ ++	} else if (ctrl->tCK <= TCK_800MHZ) { ++		ctrl->tCK = TCK_800MHZ; ++		ctrl->base_freq = 133; ++		ctrl->mem_clock_mhz = 800; ++ ++	} else if (ctrl->tCK <= TCK_700MHZ) { ++		ctrl->tCK = TCK_700MHZ; ++		ctrl->base_freq = 100; ++		ctrl->mem_clock_mhz = 700; ++ ++	} else if (ctrl->tCK <= TCK_666MHZ) { ++		ctrl->tCK = TCK_666MHZ; ++		ctrl->base_freq = 133; ++		ctrl->mem_clock_mhz = 666; ++ ++	} else if (ctrl->tCK <= TCK_533MHZ) { ++		ctrl->tCK = TCK_533MHZ; ++		ctrl->base_freq = 133; ++		ctrl->mem_clock_mhz = 533; ++ ++	} else if (ctrl->tCK <= TCK_400MHZ) { ++		ctrl->tCK = TCK_400MHZ; ++		ctrl->base_freq = 133; ++		ctrl->mem_clock_mhz = 400; ++ ++	} else { ++		ctrl->tCK = 0; ++		ctrl->base_freq = 1; ++		ctrl->mem_clock_mhz = 0; ++		return; ++	} ++	if (!pll_ref100 && ctrl->base_freq == 100) { ++		/* Skip unsupported frequency */ ++		ctrl->tCK++; ++		normalize_tck(ctrl, pll_ref100); ++	} ++} ++ ++#define MIN_CAS	4 ++#define MAX_CAS	24 ++ ++static uint8_t find_compatible_cas(struct sysinfo *ctrl) ++{ ++	printk(RAM_DEBUG, "With tCK %u, try CAS: ", ctrl->tCK); ++	const uint8_t cas_lower = MAX(MIN_CAS, DIV_ROUND_UP(ctrl->tAA, ctrl->tCK)); ++	const uint8_t cas_upper = MIN(MAX_CAS, 19); /* JEDEC MR0 limit */ ++ ++	if (!(ctrl->cas_supported >> (cas_lower - MIN_CAS))) { ++		printk(RAM_DEBUG, "DIMMs do not support CAS >= %u\n", cas_lower); ++		ctrl->tCK++; ++		return 0; ++	} ++	for (uint8_t cas = cas_lower; cas <= cas_upper; cas++) { ++		printk(RAM_DEBUG, "%u ", cas); ++		if (ctrl->cas_supported & BIT(cas - MIN_CAS)) { ++			printk(RAM_DEBUG, "OK\n"); ++			return cas; ++		} ++	} ++	return 0; ++} ++ ++static enum raminit_status find_cas_tck(struct sysinfo *ctrl) ++{ ++	/** TODO: Honor all possible PLL_REF100_CFG values **/ ++	uint8_t pll_ref100 = (pci_read_config32(HOST_BRIDGE, CAPID0_B) >> 21) & 0x7; ++	printk(RAM_DEBUG, "PLL_REF100_CFG value: 0x%x\n", pll_ref100); ++	printk(RAM_DEBUG, "100MHz reference clock support: %s\n", pll_ref100 ? "yes" : "no"); ++ ++	uint8_t selected_cas; ++	while (true) { ++		/* Round tCK up so that it is a multiple of either 133 or 100 MHz */ ++		normalize_tck(ctrl, pll_ref100); ++		if (!ctrl->tCK) { ++			printk(BIOS_ERR, "Couldn't find compatible clock / CAS settings\n"); ++			return RAMINIT_STATUS_MPLL_INIT_FAILURE; ++		} ++		selected_cas = find_compatible_cas(ctrl); ++		if (selected_cas) ++			break; ++ ++		ctrl->tCK++; ++	} ++	printk(BIOS_DEBUG, "Found compatible clock / CAS settings\n"); ++	printk(BIOS_DEBUG, "Selected DRAM frequency: %u MHz\n", NS2MHZ_DIV256 / ctrl->tCK); ++	printk(BIOS_DEBUG, "Selected CAS latency   : %uT\n", selected_cas); ++	ctrl->multiplier = get_mem_multiplier(ctrl); ++	return RAMINIT_STATUS_SUCCESS; ++} ++ ++enum raminit_status initialise_mpll(struct sysinfo *ctrl) ++{ ++	if (ctrl->tCK > TCK_400MHZ) { ++		printk(BIOS_ERR, "tCK is too slow. Increasing to 400 MHz as last resort\n"); ++		ctrl->tCK = TCK_400MHZ; ++	} ++	while (true) { ++		if (!ctrl->qclkps) { ++			const enum raminit_status status = find_cas_tck(ctrl); ++			if (status) ++				return status; ++		} ++ ++		/* ++		 * Unlike previous generations, Haswell's MPLL won't shut down if the ++		 * requested frequency isn't supported. But we cannot reinitialize it. ++		 * Another different thing: MPLL registers are 4-bit instead of 8-bit. ++		 */ ++ ++		/** FIXME: Obtain current clock frequency if we want to skip this **/ ++		//if (mchbar_read32(MC_BIOS_DATA) != 0) ++		//	break; ++ ++		uint32_t mc_bios_req = ctrl->multiplier; ++		if (ctrl->base_freq == 100) { ++			/* Use 100 MHz reference clock */ ++			mc_bios_req |= BIT(4); ++		} ++		mc_bios_req |= BIT(31); ++		printk(RAM_DEBUG, "MC_BIOS_REQ = 0x%08x\n", mc_bios_req); ++		printk(BIOS_DEBUG, "MPLL busy... "); ++		mchbar_write32(MC_BIOS_REQ, mc_bios_req); ++ ++		for (unsigned int i = 0; i <= 5000; i++) { ++			if (!(mchbar_read32(MC_BIOS_REQ) & BIT(31))) { ++				printk(BIOS_DEBUG, "done in %u us\n", i); ++				break; ++			} ++			udelay(1); ++		} ++		if (mchbar_read32(MC_BIOS_REQ) & BIT(31)) ++			printk(BIOS_DEBUG, "did not lock\n"); ++ ++		/* Verify locked frequency */ ++		const uint32_t mc_bios_data = mchbar_read32(MC_BIOS_DATA); ++		printk(RAM_DEBUG, "MC_BIOS_DATA = 0x%08x\n", mc_bios_data); ++		if ((mc_bios_data & 0xf) >= ctrl->multiplier) ++			break; ++ ++		printk(BIOS_DEBUG, "Retrying at a lower frequency\n\n"); ++		ctrl->tCK++; ++	} ++	if (!ctrl->mem_clock_mhz) { ++		printk(BIOS_ERR, "Could not program MPLL frequency\n"); ++		return RAMINIT_STATUS_MPLL_INIT_FAILURE; ++	} ++	printk(BIOS_DEBUG, "MPLL frequency is set to: %u MHz ", ctrl->mem_clock_mhz); ++	ctrl->mem_clock_fs = 1000000000 / ctrl->mem_clock_mhz; ++	printk(BIOS_DEBUG, "(period: %u femtoseconds)\n", ctrl->mem_clock_fs); ++	ctrl->qclkps = ctrl->mem_clock_fs / 2000; ++	printk(BIOS_DEBUG, "Quadrature clock period: %u picoseconds\n", ctrl->qclkps); ++	return wait_for_first_rcomp(); ++} +diff --git a/src/northbridge/intel/haswell/native_raminit/io_comp_control.c b/src/northbridge/intel/haswell/native_raminit/io_comp_control.c +new file mode 100644 +index 0000000000..7e96c08938 +--- /dev/null ++++ b/src/northbridge/intel/haswell/native_raminit/io_comp_control.c +@@ -0,0 +1,22 @@ ++/* SPDX-License-Identifier: GPL-2.0-or-later */ ++ ++#include <commonlib/clamp.h> ++#include <console/console.h> ++#include <northbridge/intel/haswell/haswell.h> ++#include <timer.h> ++#include <types.h> ++ ++#include "raminit_native.h" ++ ++enum raminit_status wait_for_first_rcomp(void) ++{ ++	struct stopwatch timer; ++	stopwatch_init_msecs_expire(&timer, 2000); ++	do { ++		if (mchbar_read32(RCOMP_TIMER) & BIT(16)) ++			return RAMINIT_STATUS_SUCCESS; ++ ++	} while (!stopwatch_expired(&timer)); ++	printk(BIOS_ERR, "Timed out waiting for RCOMP to complete\n"); ++	return RAMINIT_STATUS_POLL_TIMEOUT; ++} +diff --git a/src/northbridge/intel/haswell/native_raminit/raminit_main.c b/src/northbridge/intel/haswell/native_raminit/raminit_main.c +index 2d2cfa48bb..09545422c0 100644 +--- a/src/northbridge/intel/haswell/native_raminit/raminit_main.c ++++ b/src/northbridge/intel/haswell/native_raminit/raminit_main.c +@@ -21,6 +21,7 @@ struct task_entry { +  + static const struct task_entry cold_boot[] = { + 	{ collect_spd_info,                                       true, "PROCSPD",    }, ++	{ initialise_mpll,                                        true, "INITMPLL",   }, + }; +  + /* Return a generic stepping value to make stepping checks simpler */ +diff --git a/src/northbridge/intel/haswell/native_raminit/raminit_native.h b/src/northbridge/intel/haswell/native_raminit/raminit_native.h +index 1a0793947e..a54581abc7 100644 +--- a/src/northbridge/intel/haswell/native_raminit/raminit_native.h ++++ b/src/northbridge/intel/haswell/native_raminit/raminit_native.h +@@ -23,6 +23,8 @@ enum raminit_status { + 	RAMINIT_STATUS_SUCCESS = 0, + 	RAMINIT_STATUS_NO_MEMORY_INSTALLED, + 	RAMINIT_STATUS_UNSUPPORTED_MEMORY, ++	RAMINIT_STATUS_MPLL_INIT_FAILURE, ++	RAMINIT_STATUS_POLL_TIMEOUT, + 	RAMINIT_STATUS_UNSPECIFIED_ERROR, /** TODO: Deprecated in favor of specific values **/ + }; +  +@@ -82,10 +84,19 @@ struct sysinfo { + 	uint8_t rankmap[NUM_CHANNELS]; + 	uint8_t rank_mirrored[NUM_CHANNELS]; + 	uint32_t channel_size_mb[NUM_CHANNELS]; ++ ++	uint8_t base_freq;		/* Memory base frequency, either 100 or 133 MHz */ ++	uint32_t multiplier; ++	uint32_t mem_clock_mhz; ++	uint32_t mem_clock_fs;		/* Memory clock period in femtoseconds */ ++	uint32_t qclkps;		/* Quadrature clock period in picoseconds */ + }; +  + void raminit_main(enum raminit_boot_mode bootmode); +  + enum raminit_status collect_spd_info(struct sysinfo *ctrl); ++enum raminit_status initialise_mpll(struct sysinfo *ctrl); ++ ++enum raminit_status wait_for_first_rcomp(void); +  + #endif +diff --git a/src/northbridge/intel/haswell/registers/mchbar.h b/src/northbridge/intel/haswell/registers/mchbar.h +index 5610e7089a..45f8174995 100644 +--- a/src/northbridge/intel/haswell/registers/mchbar.h ++++ b/src/northbridge/intel/haswell/registers/mchbar.h +@@ -13,6 +13,8 @@ + #define MC_INIT_STATE_G		0x5030 + #define MRC_REVISION		0x5034 /* MRC Revision */ +  ++#define RCOMP_TIMER		0x5084 ++ + #define MC_LOCK			0x50fc /* Memory Controller Lock register */ +  + #define GFXVTBAR		0x5400 /* Base address for IGD */ +@@ -61,6 +63,7 @@ +  + #define BIOS_RESET_CPL		0x5da8 /* 8-bit */ +  ++#define MC_BIOS_REQ		0x5e00 /* Memory frequency request register */ + #define MC_BIOS_DATA		0x5e04 /* Miscellaneous information for BIOS */ + #define SAPMCTL			0x5f00 +  +--  +2.39.2 + diff --git a/config/coreboot/haswell/patches/0012-haswell-NRI-Post-process-selected-timings.patch b/config/coreboot/haswell/patches/0012-haswell-NRI-Post-process-selected-timings.patch new file mode 100644 index 00000000..e38f8e57 --- /dev/null +++ b/config/coreboot/haswell/patches/0012-haswell-NRI-Post-process-selected-timings.patch @@ -0,0 +1,249 @@ +From faabed9ca8974b2e7192c55b59a9d28d75e72df6 Mon Sep 17 00:00:00 2001 +From: Angel Pons <th3fanbus@gmail.com> +Date: Sat, 7 May 2022 16:29:55 +0200 +Subject: [PATCH 12/26] haswell NRI: Post-process selected timings + +Once the MPLL has been initialised, convert the timings from the SPD to +be in DCLKs, which is what the hardware expects. In addition, calculate +the values for tREFI and tXP. + +Change-Id: Id02caf858f75b9e08016762b3aefda282b274386 +Signed-off-by: Angel Pons <th3fanbus@gmail.com> +--- + .../intel/haswell/native_raminit/Makefile.inc |   1 + + .../haswell/native_raminit/lookup_timings.c   |  62 +++++++++++ + .../haswell/native_raminit/raminit_main.c     |   1 + + .../haswell/native_raminit/raminit_native.h   |   8 ++ + .../haswell/native_raminit/spd_bitmunching.c  | 100 ++++++++++++++++++ + 5 files changed, 172 insertions(+) + create mode 100644 src/northbridge/intel/haswell/native_raminit/lookup_timings.c + +diff --git a/src/northbridge/intel/haswell/native_raminit/Makefile.inc b/src/northbridge/intel/haswell/native_raminit/Makefile.inc +index c125d84f0b..2769e0bbb4 100644 +--- a/src/northbridge/intel/haswell/native_raminit/Makefile.inc ++++ b/src/northbridge/intel/haswell/native_raminit/Makefile.inc +@@ -1,5 +1,6 @@ + ## SPDX-License-Identifier: GPL-2.0-or-later +  ++romstage-y += lookup_timings.c + romstage-y += init_mpll.c + romstage-y += io_comp_control.c + romstage-y += raminit_main.c +diff --git a/src/northbridge/intel/haswell/native_raminit/lookup_timings.c b/src/northbridge/intel/haswell/native_raminit/lookup_timings.c +new file mode 100644 +index 0000000000..038686c844 +--- /dev/null ++++ b/src/northbridge/intel/haswell/native_raminit/lookup_timings.c +@@ -0,0 +1,62 @@ ++/* SPDX-License-Identifier: GPL-2.0-or-later */ ++ ++#include <commonlib/clamp.h> ++#include <types.h> ++ ++#include "raminit_native.h" ++ ++struct timing_lookup { ++	uint32_t clock; ++	uint32_t value; ++}; ++ ++static uint32_t lookup_timing( ++	const uint32_t mem_clock_mhz, ++	const struct timing_lookup *const lookup, ++	const size_t length) ++{ ++	/* Fall back to the last index */ ++	size_t i; ++	for (i = 0; i < length - 1; i++) { ++		/* Account for imprecise frequency values */ ++		if ((mem_clock_mhz - 5) <= lookup[i].clock) ++			break; ++	} ++	return lookup[i].value; ++} ++ ++static const uint32_t fmax = UINT32_MAX; ++ ++uint8_t get_tCWL(const uint32_t mem_clock_mhz) ++{ ++	const struct timing_lookup lut[] = { ++		{  400,  5 }, ++		{  533,  6 }, ++		{  666,  7 }, ++		{  800,  8 }, ++		{  933,  9 }, ++		{ 1066, 10 }, ++		{ 1200, 11 }, ++		{ fmax, 12 }, ++	}; ++	return lookup_timing(mem_clock_mhz, lut, ARRAY_SIZE(lut)); ++} ++ ++/* tREFI = 7800 ns * DDR MHz */ ++uint32_t get_tREFI(const uint32_t mem_clock_mhz) ++{ ++	return (mem_clock_mhz * 7800) / 1000; ++} ++ ++uint32_t get_tXP(const uint32_t mem_clock_mhz) ++{ ++	const struct timing_lookup lut[] = { ++		{  400,  3 }, ++		{  666,  4 }, ++		{  800,  5 }, ++		{  933,  6 }, ++		{ 1066,  7 }, ++		{ fmax,  8 }, ++	}; ++	return lookup_timing(mem_clock_mhz, lut, ARRAY_SIZE(lut)); ++} +diff --git a/src/northbridge/intel/haswell/native_raminit/raminit_main.c b/src/northbridge/intel/haswell/native_raminit/raminit_main.c +index 09545422c0..5f2be980d4 100644 +--- a/src/northbridge/intel/haswell/native_raminit/raminit_main.c ++++ b/src/northbridge/intel/haswell/native_raminit/raminit_main.c +@@ -22,6 +22,7 @@ struct task_entry { + static const struct task_entry cold_boot[] = { + 	{ collect_spd_info,                                       true, "PROCSPD",    }, + 	{ initialise_mpll,                                        true, "INITMPLL",   }, ++	{ convert_timings,                                        true, "CONVTIM",    }, + }; +  + /* Return a generic stepping value to make stepping checks simpler */ +diff --git a/src/northbridge/intel/haswell/native_raminit/raminit_native.h b/src/northbridge/intel/haswell/native_raminit/raminit_native.h +index a54581abc7..01e5ed1bd6 100644 +--- a/src/northbridge/intel/haswell/native_raminit/raminit_native.h ++++ b/src/northbridge/intel/haswell/native_raminit/raminit_native.h +@@ -78,6 +78,9 @@ struct sysinfo { + 	uint32_t tCWL; + 	uint32_t tCMD; +  ++	uint32_t tREFI; ++	uint32_t tXP; ++ + 	uint8_t lanes;			/* 8 or 9 */ + 	uint8_t chanmap; + 	uint8_t dpc[NUM_CHANNELS];	/* DIMMs per channel */ +@@ -96,7 +99,12 @@ void raminit_main(enum raminit_boot_mode bootmode); +  + enum raminit_status collect_spd_info(struct sysinfo *ctrl); + enum raminit_status initialise_mpll(struct sysinfo *ctrl); ++enum raminit_status convert_timings(struct sysinfo *ctrl); +  + enum raminit_status wait_for_first_rcomp(void); +  ++uint8_t get_tCWL(uint32_t mem_clock_mhz); ++uint32_t get_tREFI(uint32_t mem_clock_mhz); ++uint32_t get_tXP(uint32_t mem_clock_mhz); ++ + #endif +diff --git a/src/northbridge/intel/haswell/native_raminit/spd_bitmunching.c b/src/northbridge/intel/haswell/native_raminit/spd_bitmunching.c +index dbe02c72d0..becbea0725 100644 +--- a/src/northbridge/intel/haswell/native_raminit/spd_bitmunching.c ++++ b/src/northbridge/intel/haswell/native_raminit/spd_bitmunching.c +@@ -204,3 +204,103 @@ enum raminit_status collect_spd_info(struct sysinfo *ctrl) + 	get_spd_data(ctrl); + 	return find_common_spd_parameters(ctrl); + } ++ ++#define MIN_CWL		5 ++#define MAX_CWL		12 ++ ++/* Except for tCK, hardware expects all timing values in DCLKs, not nanoseconds */ ++enum raminit_status convert_timings(struct sysinfo *ctrl) ++{ ++	/* ++	 * Obtain all required timing values, in DCLKs. ++	 */ ++ ++	/* Convert primary timings from nanoseconds to DCLKs */ ++	ctrl->tAA  = DIV_ROUND_UP(ctrl->tAA,  ctrl->tCK); ++	ctrl->tWR  = DIV_ROUND_UP(ctrl->tWR,  ctrl->tCK); ++	ctrl->tRCD = DIV_ROUND_UP(ctrl->tRCD, ctrl->tCK); ++	ctrl->tRRD = DIV_ROUND_UP(ctrl->tRRD, ctrl->tCK); ++	ctrl->tRP  = DIV_ROUND_UP(ctrl->tRP,  ctrl->tCK); ++	ctrl->tRAS = DIV_ROUND_UP(ctrl->tRAS, ctrl->tCK); ++	ctrl->tRC  = DIV_ROUND_UP(ctrl->tRC,  ctrl->tCK); ++	ctrl->tRFC = DIV_ROUND_UP(ctrl->tRFC, ctrl->tCK); ++	ctrl->tWTR = DIV_ROUND_UP(ctrl->tWTR, ctrl->tCK); ++	ctrl->tRTP = DIV_ROUND_UP(ctrl->tRTP, ctrl->tCK); ++	ctrl->tFAW = DIV_ROUND_UP(ctrl->tFAW, ctrl->tCK); ++	ctrl->tCWL = DIV_ROUND_UP(ctrl->tCWL, ctrl->tCK); ++	ctrl->tCMD = DIV_ROUND_UP(ctrl->tCMD, ctrl->tCK); ++ ++	/* Constrain primary timings to hardware limits */ ++	/** TODO: complain when clamping? **/ ++	ctrl->tAA  = clamp_u32(4,  ctrl->tAA,  24); ++	ctrl->tWR  = clamp_u32(5,  ctrl->tWR,  16); ++	ctrl->tRCD = clamp_u32(4,  ctrl->tRCD, 20); ++	ctrl->tRRD = clamp_u32(4,  ctrl->tRRD, 65535); ++	ctrl->tRP  = clamp_u32(4,  ctrl->tRP,  15); ++	ctrl->tRAS = clamp_u32(10, ctrl->tRAS, 40); ++	ctrl->tRC  = clamp_u32(1,  ctrl->tRC,  4095); ++	ctrl->tRFC = clamp_u32(1,  ctrl->tRFC, 511); ++	ctrl->tWTR = clamp_u32(4,  ctrl->tWTR, 10); ++	ctrl->tRTP = clamp_u32(4,  ctrl->tRTP, 15); ++	ctrl->tFAW = clamp_u32(10, ctrl->tFAW, 54); ++ ++	/** TODO: Honor tREFI from XMP **/ ++	ctrl->tREFI = get_tREFI(ctrl->mem_clock_mhz); ++	ctrl->tXP   =   get_tXP(ctrl->mem_clock_mhz); ++ ++	/* ++	 * Check some values, and adjust them if necessary. ++	 */ ++ ++	/* If tWR cannot be written into DDR3 MR0, adjust it */ ++	switch (ctrl->tWR) { ++	case  9: ++	case 11: ++	case 13: ++	case 15: ++		ctrl->tWR++; ++	} ++ ++	/* If tCWL is not supported or unspecified, look up a reasonable default */ ++	if (ctrl->tCWL < MIN_CWL || ctrl->tCWL > MAX_CWL) ++		ctrl->tCWL = get_tCWL(ctrl->mem_clock_mhz); ++ ++	/* This is needed to support ODT properly on 2DPC */ ++	if (ctrl->tAA - ctrl->tCWL > 4) ++		ctrl->tCWL = ctrl->tAA - 4; ++ ++	/* If tCMD is invalid, use a guesstimate default */ ++	if (!ctrl->tCMD) { ++		ctrl->tCMD = MAX(ctrl->dpc[0], ctrl->dpc[1]); ++		printk(RAM_DEBUG, "tCMD was zero, picking a guesstimate value\n"); ++	} ++	ctrl->tCMD = clamp_u32(1, ctrl->tCMD, 3); ++ ++	/* ++	 * Print final timings. ++	 */ ++ ++	/* tCK is special */ ++	printk(BIOS_DEBUG, "Selected tCK          : %u ns\n", ctrl->tCK / 256); ++ ++	/* Primary timings */ ++	printk(BIOS_DEBUG, "Selected tAA          : %uT\n", ctrl->tAA); ++	printk(BIOS_DEBUG, "Selected tWR          : %uT\n", ctrl->tWR); ++	printk(BIOS_DEBUG, "Selected tRCD         : %uT\n", ctrl->tRCD); ++	printk(BIOS_DEBUG, "Selected tRRD         : %uT\n", ctrl->tRRD); ++	printk(BIOS_DEBUG, "Selected tRP          : %uT\n", ctrl->tRP); ++	printk(BIOS_DEBUG, "Selected tRAS         : %uT\n", ctrl->tRAS); ++	printk(BIOS_DEBUG, "Selected tRC          : %uT\n", ctrl->tRC); ++	printk(BIOS_DEBUG, "Selected tRFC         : %uT\n", ctrl->tRFC); ++	printk(BIOS_DEBUG, "Selected tWTR         : %uT\n", ctrl->tWTR); ++	printk(BIOS_DEBUG, "Selected tRTP         : %uT\n", ctrl->tRTP); ++	printk(BIOS_DEBUG, "Selected tFAW         : %uT\n", ctrl->tFAW); ++	printk(BIOS_DEBUG, "Selected tCWL         : %uT\n", ctrl->tCWL); ++	printk(BIOS_DEBUG, "Selected tCMD         : %uT\n", ctrl->tCMD); ++ ++	/* Derived timings */ ++	printk(BIOS_DEBUG, "Selected tREFI        : %uT\n", ctrl->tREFI); ++	printk(BIOS_DEBUG, "Selected tXP          : %uT\n", ctrl->tXP); ++ ++	return RAMINIT_STATUS_SUCCESS; ++} +--  +2.39.2 + diff --git a/config/coreboot/haswell/patches/0013-haswell-NRI-Configure-initial-MC-settings.patch b/config/coreboot/haswell/patches/0013-haswell-NRI-Configure-initial-MC-settings.patch new file mode 100644 index 00000000..b1c33328 --- /dev/null +++ b/config/coreboot/haswell/patches/0013-haswell-NRI-Configure-initial-MC-settings.patch @@ -0,0 +1,1593 @@ +From 1b0b17d85256193de825fa7ff0e04767c818f2fc Mon Sep 17 00:00:00 2001 +From: Angel Pons <th3fanbus@gmail.com> +Date: Sat, 7 May 2022 17:22:07 +0200 +Subject: [PATCH 13/26] haswell NRI: Configure initial MC settings + +Program initial memory controller settings. Many of these values will be +adjusted later during training. + +Change-Id: If33846b51cb1bab5d0458fe626e13afb1bdc900e +Signed-off-by: Angel Pons <th3fanbus@gmail.com> +--- + .../intel/haswell/native_raminit/Makefile.inc |   2 + + .../haswell/native_raminit/configure_mc.c     | 822 ++++++++++++++++++ + .../haswell/native_raminit/raminit_main.c     |   2 + + .../haswell/native_raminit/raminit_native.h   | 101 +++ + .../haswell/native_raminit/reg_structs.h      | 405 +++++++++ + .../haswell/native_raminit/timings_refresh.c  |  13 + + .../intel/haswell/registers/mchbar.h          |  94 ++ + 7 files changed, 1439 insertions(+) + create mode 100644 src/northbridge/intel/haswell/native_raminit/configure_mc.c + create mode 100644 src/northbridge/intel/haswell/native_raminit/reg_structs.h + create mode 100644 src/northbridge/intel/haswell/native_raminit/timings_refresh.c + +diff --git a/src/northbridge/intel/haswell/native_raminit/Makefile.inc b/src/northbridge/intel/haswell/native_raminit/Makefile.inc +index 2769e0bbb4..fc55277a65 100644 +--- a/src/northbridge/intel/haswell/native_raminit/Makefile.inc ++++ b/src/northbridge/intel/haswell/native_raminit/Makefile.inc +@@ -1,8 +1,10 @@ + ## SPDX-License-Identifier: GPL-2.0-or-later +  ++romstage-y += configure_mc.c + romstage-y += lookup_timings.c + romstage-y += init_mpll.c + romstage-y += io_comp_control.c + romstage-y += raminit_main.c + romstage-y += raminit_native.c + romstage-y += spd_bitmunching.c ++romstage-y += timings_refresh.c +diff --git a/src/northbridge/intel/haswell/native_raminit/configure_mc.c b/src/northbridge/intel/haswell/native_raminit/configure_mc.c +new file mode 100644 +index 0000000000..2a667b075b +--- /dev/null ++++ b/src/northbridge/intel/haswell/native_raminit/configure_mc.c +@@ -0,0 +1,822 @@ ++/* SPDX-License-Identifier: GPL-2.0-or-later */ ++ ++#include <assert.h> ++#include <commonlib/clamp.h> ++#include <console/console.h> ++#include <delay.h> ++#include <lib.h> ++#include <northbridge/intel/haswell/haswell.h> ++#include <string.h> ++#include <types.h> ++ ++#include "raminit_native.h" ++ ++static void program_misc_control(struct sysinfo *ctrl) ++{ ++	if (!is_hsw_ult()) ++		return; ++ ++	const union ddr_scram_misc_control_reg ddr_scram_misc_ctrl = { ++		.ddr_no_ch_interleave = !ctrl->dq_pins_interleaved, ++		.lpddr_mode           = ctrl->lpddr, ++		.cke_mapping_ch0      = ctrl->lpddr ? ctrl->lpddr_cke_rank_map[0] : 0, ++		.cke_mapping_ch1      = ctrl->lpddr ? ctrl->lpddr_cke_rank_map[1] : 0, ++	}; ++	mchbar_write32(DDR_SCRAM_MISC_CONTROL, ddr_scram_misc_ctrl.raw); ++} ++ ++static void program_mrc_revision(void) ++{ ++	mchbar_write32(MRC_REVISION, 0x01090000);	/* MRC 1.9.0 Build 0 */ ++} ++ ++static void program_ranks_used(struct sysinfo *ctrl) ++{ ++	for (uint8_t channel = 0; channel < NUM_CHANNELS; channel++) { ++		mchbar_write8(MC_INIT_STATE_ch(channel), ctrl->rankmap[channel]); ++		if (!does_ch_exist(ctrl, channel)) { ++			mchbar_write32(DDR_CLK_ch_RANKS_USED(channel), 0); ++			mchbar_write32(DDR_CTL_ch_CTL_RANKS_USED(channel), 0); ++			mchbar_write32(DDR_CKE_ch_CTL_RANKS_USED(channel), 0); ++			continue; ++		} ++		uint32_t clk_ranks_used = ctrl->rankmap[channel]; ++		if (ctrl->lpddr) { ++			/* With LPDDR, the clock usage goes by group instead */ ++			clk_ranks_used = 0; ++			for (uint8_t group = 0; group < NUM_GROUPS; group++) { ++				if (ctrl->dq_byte_map[channel][CT_ITERATION_CLOCK][group]) ++					clk_ranks_used |= BIT(group); ++			} ++		} ++		mchbar_write32(DDR_CLK_ch_RANKS_USED(channel), clk_ranks_used); ++ ++		uint32_t ctl_ranks_used = ctrl->rankmap[channel]; ++		if (is_hsw_ult()) { ++			/* Set ODT disable bits */ ++			/** TODO: May need to do this after JEDEC reset/init **/ ++			if (ctrl->lpddr && ctrl->lpddr_dram_odt) ++				ctl_ranks_used |= 2 << 4;	/* ODT is used on rank 0 */ ++			else ++				ctl_ranks_used |= 3 << 4; ++		} ++		mchbar_write32(DDR_CTL_ch_CTL_RANKS_USED(channel), ctl_ranks_used); ++ ++		uint32_t cke_ranks_used = ctrl->rankmap[channel]; ++		if (ctrl->lpddr) { ++			/* Use CKE-to-rank mapping for LPDDR */ ++			const uint8_t cke_rank_map = ctrl->lpddr_cke_rank_map[channel]; ++			cke_ranks_used = 0; ++			for (uint8_t rank = 0; rank < NUM_SLOTRANKS; rank++) { ++				/* ULT only has 2 ranks per channel */ ++				if (rank >= 2) ++					break; ++ ++				if (!rank_in_ch(ctrl, rank, channel)) ++					continue; ++ ++				for (uint8_t cke = 0; cke < 4; cke++) { ++					if (rank == ((cke_rank_map >> cke) & 1)) ++						cke_ranks_used |= BIT(cke); ++				} ++			} ++		} ++		mchbar_write32(DDR_CKE_ch_CTL_RANKS_USED(channel), cke_ranks_used); ++	} ++} ++ ++static const uint8_t rxb_trad[2][5][4] = { ++	{	/* Vdd low */ ++		/*  1067 MHz,     1333 MHz,     1600 MHz,     1867 MHz,     2133 MHz, */ ++		{4, 3, 3, 2}, {4, 4, 3, 2}, {5, 4, 3, 3}, {5, 4, 4, 3}, {5, 4, 4, 3}, ++	}, ++	{	/* Vdd hi */ ++		/*  1067 MHz,     1333 MHz,     1600 MHz,     1867 MHz,     2133 MHz, */ ++		{4, 3, 3, 2}, {4, 4, 3, 2}, {5, 4, 3, 3}, {5, 4, 4, 3}, {4, 4, 3, 3}, ++	}, ++}; ++ ++static const uint8_t rxb_ultx[2][3][4] = { ++	{	/* Vdd low */ ++		/*  1067 MHz,     1333 MHz,     1600 MHz, */ ++		{5, 6, 6, 5}, {5, 6, 6, 5}, {4, 6, 6, 6}, ++	}, ++	{	/* Vdd hi */ ++		/*  1067 MHz,     1333 MHz,     1600 MHz, */ ++		{7, 6, 6, 5}, {7, 6, 6, 5}, {7, 6, 6, 6}, ++	}, ++}; ++ ++uint8_t get_rx_bias(const struct sysinfo *ctrl) ++{ ++	const bool is_ult = is_hsw_ult(); ++	const bool vddhi  = ctrl->vdd_mv > 1350; ++	const uint8_t max_rxf = is_ult ? ARRAY_SIZE(rxb_ultx[0]) : ARRAY_SIZE(rxb_trad[0]); ++	const uint8_t ref_clk = ctrl->base_freq == 133 ? 4 : 6; ++	const uint8_t rx_f    = clamp_s8(0, ctrl->multiplier - ref_clk, max_rxf - 1); ++	const uint8_t rx_cb   = mchbar_read32(DDR_CLK_CB_STATUS) & 0x3; ++	if (is_ult) ++		return rxb_ultx[vddhi][rx_f][rx_cb]; ++	else ++		return rxb_trad[vddhi][rx_f][rx_cb]; ++} ++ ++static void program_ddr_data(struct sysinfo *ctrl, const bool dis_odt_static, const bool vddhi) ++{ ++	const bool is_ult = is_hsw_ult(); ++ ++	for (uint8_t rank = 0; rank < NUM_SLOTRANKS; rank++) { ++		if (!does_rank_exist(ctrl, rank)) ++			continue; ++ ++		const union ddr_data_rx_train_rank_reg rx_train = { ++			.rcven = 64, ++			.dqs_p = 32, ++			.dqs_n = 32, ++		}; ++		mchbar_write32(DDR_DATA_RX_TRAIN_RANK(rank), rx_train.raw); ++		mchbar_write32(DDR_DATA_RX_PER_BIT_RANK(rank), 0x88888888); ++ ++		const union ddr_data_tx_train_rank_reg tx_train = { ++			.tx_eq     = TXEQFULLDRV | 11, ++			.dq_delay  = 96, ++			.dqs_delay = 64, ++		}; ++		mchbar_write32(DDR_DATA_TX_TRAIN_RANK(rank), tx_train.raw); ++		mchbar_write32(DDR_DATA_TX_PER_BIT_RANK(rank), 0x88888888); ++ ++		for (uint8_t channel = 0; channel < NUM_CHANNELS; channel++) { ++			for (uint8_t byte = 0; byte < ctrl->lanes; byte++) { ++				ctrl->tx_dq[channel][rank][byte] = tx_train.dq_delay; ++				ctrl->txdqs[channel][rank][byte] = tx_train.dqs_delay; ++				ctrl->tx_eq[channel][rank][byte] = tx_train.tx_eq; ++ ++				ctrl->rcven[channel][rank][byte]  = rx_train.rcven; ++				ctrl->rxdqsp[channel][rank][byte] = rx_train.dqs_p; ++				ctrl->rxdqsn[channel][rank][byte] = rx_train.dqs_n; ++				ctrl->rx_eq[channel][rank][byte]  = rx_train.rx_eq; ++			} ++		} ++	} ++	mchbar_write32(DDR_DATA_TX_XTALK, 0); ++	mchbar_write32(DDR_DATA_RX_OFFSET_VDQ, 0x88888888); ++	mchbar_write32(DDR_DATA_OFFSET_TRAIN, 0); ++	mchbar_write32(DDR_DATA_OFFSET_COMP, 0); ++ ++	const union ddr_data_control_0_reg data_control_0 = { ++		.internal_clocks_on = !is_ult, ++		.data_vccddq_hi     = vddhi, ++		.disable_odt_static = dis_odt_static, ++		.lpddr_mode         = ctrl->lpddr, ++		.odt_samp_extend_en = ctrl->lpddr, ++		.early_rleak_en     = ctrl->lpddr && ctrl->stepping >= STEPPING_C0, ++	}; ++	mchbar_write32(DDR_DATA_CONTROL_0, data_control_0.raw); ++ ++	const union ddr_data_control_1_reg data_control_1 = { ++		.dll_mask             = 1, ++		.rx_bias_ctl          = get_rx_bias(ctrl), ++		.odt_delay            = -2, ++		.odt_duration         = 7, ++		.sense_amp_delay      = -2, ++		.sense_amp_duration   = 7, ++	}; ++	mchbar_write32(DDR_DATA_CONTROL_1, data_control_1.raw); ++ ++	clear_data_offset_train_all(ctrl); ++ ++	/* Stagger byte turn-on to reduce dI/dT */ ++	const uint8_t byte_stagger[] = { 0, 4, 1, 5, 2, 6, 3, 7, 8 }; ++	const uint8_t latency = 2 * ctrl->tAA - 6; ++	for (uint8_t channel = 0; channel < NUM_CHANNELS; channel++) { ++		if (!does_ch_exist(ctrl, channel)) ++			continue; ++ ++		union ddr_data_control_2_reg data_control_2 = { ++			.raw = 0, ++		}; ++		if (is_ult) { ++			data_control_2.rx_dqs_amp_offset = 8; ++			data_control_2.rx_clk_stg_num    = 0x1f; ++			data_control_2.leaker_comp       = ctrl->lpddr ? 3 : 0; ++		} ++		for (uint8_t byte = 0; byte < ctrl->lanes; byte++) { ++			const uint8_t stg = latency * byte_stagger[byte] / ctrl->lanes; ++			data_control_2.rx_stagger_ctl = stg & 0x1f; ++			mchbar_write32(DQ_CONTROL_2(channel, byte), data_control_2.raw); ++			ctrl->data_offset_comp[channel][byte] = 0; ++			ctrl->dq_control_1[channel][byte] = data_control_1.raw; ++			ctrl->dq_control_2[channel][byte] = data_control_2.raw; ++		} ++		ctrl->dq_control_0[channel] = data_control_0.raw; ++	} ++} ++ ++static void program_vsshi_control(struct sysinfo *ctrl, const uint16_t vsshi_mv) ++{ ++	const uint32_t vsshi_control_reg = is_hsw_ult() ? 0x366c : 0x306c; ++	const union ddr_comp_vsshi_control_reg ddr_vsshi_control = { ++		.vsshi_target    = (vsshi_mv * 192) / ctrl->vdd_mv - 20, ++		.hi_bw_divider   = 1, ++		.lo_bw_divider   = 1, ++		.bw_error        = 2, ++		.panic_driver_en = 1, ++		.panic_voltage   = 24 / 8, /* Voltage in 8mV steps */ ++		.gain_boost      = 1, ++	}; ++	mchbar_write32(vsshi_control_reg, ddr_vsshi_control.raw); ++	mchbar_write32(DDR_COMP_VSSHI_CONTROL, ddr_vsshi_control.raw); ++} ++ ++static void calc_vt_slope_code(const uint16_t slope, uint8_t *best_a, uint8_t *best_b) ++{ ++	const int16_t coding[] = {0, -125, -62, -31, 250, 125, 62, 31}; ++	*best_a = 0; ++	*best_b = 0; ++	int16_t	best_err = slope; ++	for (uint8_t b = 0; b < ARRAY_SIZE(coding); b++) { ++		for (uint8_t a = b; a < ARRAY_SIZE(coding); a++) { ++			int16_t	error = slope - (coding[a] + coding[b]); ++			if (error < 0) ++				error = -error; ++ ++			if (error < best_err) { ++				best_err = error; ++				*best_a = a; ++				*best_b = b; ++			} ++		} ++	} ++} ++ ++static void program_dimm_vref(struct sysinfo *ctrl, const uint16_t vccio_mv, const bool vddhi) ++{ ++	const bool is_ult = is_hsw_ult(); ++ ++	/* Static values for ULT */ ++	uint8_t vt_slope_a = 4; ++	uint8_t vt_slope_b = 0; ++	if (!is_ult) { ++		/* On non-ULT, compute best slope code */ ++		const uint16_t vt_slope = 1500 * vccio_mv / ctrl->vdd_mv - 1000; ++		calc_vt_slope_code(vt_slope, &vt_slope_a, &vt_slope_b); ++	} ++	const union ddr_data_vref_control_reg ddr_vref_control = { ++		.hi_bw_divider  = is_ult ? 0 : 3, ++		.lo_bw_divider  = 3, ++		.sample_divider = is_ult ? 1 : 3, ++		.slow_bw_error  = 1, ++		.hi_bw_enable   = 1, ++		.vt_slope_b     = vt_slope_b, ++		.vt_slope_a     = vt_slope_a, ++		.vt_offset      = 0, ++	}; ++	mchbar_write32(is_ult ? 0xf68 : 0xf6c, ddr_vref_control.raw); /* Use CH1 byte 7 */ ++ ++	const union ddr_data_vref_adjust_reg ddr_vref_adjust = { ++		.en_dimm_vref_ca  = 1, ++		.en_dimm_vref_ch0 = 1, ++		.en_dimm_vref_ch1 = 1, ++		.vccddq_hi_qnnn_h = vddhi, ++		.hi_z_timer_ctrl  = 3, ++	}; ++	ctrl->dimm_vref = ddr_vref_adjust; ++	mchbar_write32(DDR_DATA_VREF_ADJUST, ddr_vref_adjust.raw); ++} ++ ++static uint32_t pi_code(const uint32_t code) ++{ ++	return code << 21 | code << 14 | code << 7 | code << 0; ++} ++ ++static void program_ddr_ca(struct sysinfo *ctrl, const bool vddhi) ++{ ++	for (uint8_t channel = 0; channel < NUM_CHANNELS; channel++) { ++		if (!does_ch_exist(ctrl, channel)) ++			continue; ++ ++		const union ddr_clk_controls_reg ddr_clk_controls = { ++			.dll_mask   = 1, ++			.vccddq_hi  = vddhi, ++			.lpddr_mode = ctrl->lpddr, ++		}; ++		mchbar_write32(DDR_CLK_ch_CONTROLS(channel), ddr_clk_controls.raw); ++ ++		const union ddr_cmd_controls_reg ddr_cmd_controls = { ++			.dll_mask         = 1, ++			.vccddq_hi        = vddhi, ++			.lpddr_mode       = ctrl->lpddr, ++			.early_weak_drive = 3, ++			.cmd_tx_eq        = 1, ++		}; ++		mchbar_write32(DDR_CMD_ch_CONTROLS(channel), ddr_cmd_controls.raw); ++ ++		const union ddr_cke_ctl_controls_reg ddr_cke_controls = { ++			.dll_mask         = 1, ++			.vccddq_hi        = vddhi, ++			.lpddr_mode       = ctrl->lpddr, ++			.early_weak_drive = 3, ++			.cmd_tx_eq        = 1, ++			.ctl_tx_eq        = 1, ++			.ctl_sr_drv       = 2, ++		}; ++		mchbar_write32(DDR_CKE_ch_CTL_CONTROLS(channel), ddr_cke_controls.raw); ++ ++		const union ddr_cke_ctl_controls_reg ddr_ctl_controls = { ++			.dll_mask       = 1, ++			.vccddq_hi      = vddhi, ++			.lpddr_mode     = ctrl->lpddr, ++			.ctl_tx_eq      = 1, ++			.ctl_sr_drv     = 2, ++			.la_drv_en_ovrd = 1,	/* Must be set on ULT */ ++		}; ++		mchbar_write32(DDR_CTL_ch_CTL_CONTROLS(channel), ddr_ctl_controls.raw); ++ ++		const uint8_t cmd_pi = ctrl->lpddr ? 96 : 64; ++		mchbar_write32(DDR_CMD_ch_PI_CODING(channel), pi_code(cmd_pi)); ++		mchbar_write32(DDR_CKE_ch_CMD_PI_CODING(channel), pi_code(cmd_pi)); ++		mchbar_write32(DDR_CKE_CTL_ch_CTL_PI_CODING(channel), pi_code(64)); ++		mchbar_write32(DDR_CLK_ch_PI_CODING(channel), pi_code(64)); ++ ++		mchbar_write32(DDR_CMD_ch_COMP_OFFSET(channel), 0); ++		mchbar_write32(DDR_CLK_ch_COMP_OFFSET(channel), 0); ++		mchbar_write32(DDR_CKE_CTL_ch_CTL_COMP_OFFSET(channel), 0); ++ ++		for (uint8_t group = 0; group < NUM_GROUPS; group++) { ++			ctrl->cke_cmd_pi_code[channel][group] = cmd_pi; ++			ctrl->cmd_north_pi_code[channel][group] = cmd_pi; ++			ctrl->cmd_south_pi_code[channel][group] = cmd_pi; ++		} ++		for (uint8_t rank = 0; rank < NUM_SLOTRANKS; rank++) { ++			ctrl->clk_pi_code[channel][rank] = 64; ++			ctrl->ctl_pi_code[channel][rank] = 64; ++		} ++	} ++} ++ ++enum { ++	RCOMP_RD_ODT = 0, ++	RCOMP_WR_DS_DQ, ++	RCOMP_WR_DS_CMD, ++	RCOMP_WR_DS_CTL, ++	RCOMP_WR_DS_CLK, ++	RCOMP_MAX_CODES, ++}; ++ ++struct rcomp_info { ++	uint8_t resistor; ++	uint8_t sz_steps; ++	uint8_t target_r; ++	int8_t result; ++}; ++ ++static void program_rcomp_vref(struct sysinfo *ctrl, const bool dis_odt_static) ++{ ++	const bool is_ult = is_hsw_ult(); ++	/* ++	 * +-------------------------------+ ++	 * | Rcomp resistor values in ohms | ++	 * +-----------+------+------+-----+ ++	 * | Ball name | Trad | ULTX | Use | ++	 * +-----------+------+------+-----+ ++	 * | SM_RCOMP0 | 100  | 200  | CMD | ++	 * | SM_RCOMP1 |  75  | 120  | DQ  | ++	 * | SM_RCOMP2 | 100  | 100  | ODT | ++	 * +-----------+------+------+-----+ ++	 */ ++	struct rcomp_info rcomp_cfg[RCOMP_MAX_CODES] = { ++		[RCOMP_RD_ODT] = { ++			.resistor = 50, ++			.sz_steps = 96, ++			.target_r = 50, ++		}, ++		[RCOMP_WR_DS_DQ] = { ++			.resistor = 25, ++			.sz_steps = 64, ++			.target_r = 33, ++		}, ++		[RCOMP_WR_DS_CMD] = { ++			.resistor = 20, ++			.sz_steps = 64, ++			.target_r = 20, ++		}, ++		[RCOMP_WR_DS_CTL] = { ++			.resistor = 20, ++			.sz_steps = 64, ++			.target_r = 20, ++		}, ++		[RCOMP_WR_DS_CLK] = { ++			.resistor = 25, ++			.sz_steps = 64, ++			.target_r = 29, ++		}, ++	}; ++	if (is_ult) { ++		rcomp_cfg[RCOMP_WR_DS_DQ].resistor = 40; ++		rcomp_cfg[RCOMP_WR_DS_DQ].target_r = 40; ++		rcomp_cfg[RCOMP_WR_DS_CLK].resistor = 40; ++	} else if (ctrl->dpc[0] == 2 || ctrl->dpc[1] == 2) { ++		rcomp_cfg[RCOMP_RD_ODT].target_r = 60; ++	} ++	for (uint8_t i = 0; i < RCOMP_MAX_CODES; i++) { ++		struct rcomp_info *const r = &rcomp_cfg[i]; ++		const int32_t div = 2 * (r->resistor + r->target_r); ++		assert(div); ++		const int32_t vref = (r->sz_steps * (r->resistor - r->target_r)) / div; ++ ++		/* DqOdt is 5 bits wide, the other Rcomp targets are 4 bits wide */ ++		const int8_t comp_limit = i == RCOMP_RD_ODT ? 16 : 8; ++		r->result = clamp_s32(-comp_limit, vref, comp_limit - 1); ++	} ++	const union ddr_comp_ctl_0_reg ddr_comp_ctl_0 = { ++		.disable_odt_static = dis_odt_static, ++		.dq_drv_vref        = rcomp_cfg[RCOMP_WR_DS_DQ].result, ++		.dq_odt_vref        = rcomp_cfg[RCOMP_RD_ODT].result, ++		.cmd_drv_vref       = rcomp_cfg[RCOMP_WR_DS_CMD].result, ++		.ctl_drv_vref       = rcomp_cfg[RCOMP_WR_DS_CTL].result, ++		.clk_drv_vref       = rcomp_cfg[RCOMP_WR_DS_CLK].result, ++	}; ++	ctrl->comp_ctl_0 = ddr_comp_ctl_0; ++	mchbar_write32(DDR_COMP_CTL_0, ctrl->comp_ctl_0.raw); ++} ++ ++enum { ++	SCOMP_DQ = 0, ++	SCOMP_CMD, ++	SCOMP_CTL, ++	SCOMP_CLK, ++	SCOMP_MAX_CODES, ++}; ++ ++static void program_slew_rates(struct sysinfo *ctrl, const bool vddhi) ++{ ++	const uint8_t min_cycle_delay[SCOMP_MAX_CODES] = { 46, 70, 70, 46 }; ++	uint8_t buffer_stage_delay_ps[SCOMP_MAX_CODES] = { 59, 53, 53, 53 }; ++	uint16_t comp_slew_rate_codes[SCOMP_MAX_CODES]; ++ ++	/* CMD Slew Rate = 1.8 for 2N */ ++	if (ctrl->tCMD == 2) ++		buffer_stage_delay_ps[SCOMP_CMD] = 89; ++ ++	/* CMD Slew Rate = 4 V/ns for double-pumped CMD bus */ ++	if (ctrl->lpddr) ++		buffer_stage_delay_ps[SCOMP_CMD] = 63; ++ ++	for (uint8_t i = 0; i < SCOMP_MAX_CODES; i++) { ++		uint16_t stages = DIV_ROUND_CLOSEST(ctrl->qclkps, buffer_stage_delay_ps[i]); ++		if (stages < 5) ++			stages = 5; ++ ++		bool dll_pc = buffer_stage_delay_ps[i] < min_cycle_delay[i] || stages > 16; ++ ++		/* Lock DLL... */ ++		if (dll_pc) ++			comp_slew_rate_codes[i] = stages / 2 - 1;        /* to a phase */ ++		else ++			comp_slew_rate_codes[i] = (stages - 1) | BIT(4); /* to a cycle */ ++	} ++	union ddr_comp_ctl_1_reg ddr_comp_ctl_1 = { ++		.dq_scomp       = comp_slew_rate_codes[SCOMP_DQ], ++		.cmd_scomp      = comp_slew_rate_codes[SCOMP_CMD], ++		.ctl_scomp      = comp_slew_rate_codes[SCOMP_CTL], ++		.clk_scomp      = comp_slew_rate_codes[SCOMP_CLK], ++		.vccddq_hi      = vddhi, ++	}; ++	ctrl->comp_ctl_1 = ddr_comp_ctl_1; ++	mchbar_write32(DDR_COMP_CTL_1, ctrl->comp_ctl_1.raw); ++} ++ ++static uint32_t ln_x100(const uint32_t input_x100) ++{ ++	uint32_t val = input_x100; ++	uint32_t ret = 0; ++	while (val > 271) { ++		val = (val * 1000) / 2718; ++		ret += 100; ++	} ++	return ret + (-16 * val * val + 11578 * val - 978860) / 10000; ++} ++ ++static uint32_t compute_vsshi_vref(struct sysinfo *ctrl, const uint32_t vsshi_tgt, bool up) ++{ ++	const uint32_t delta = 15; ++	const uint32_t c_die_vsshi = 2000; ++	const uint32_t r_cmd_ref = 100 * 10; ++	const uint32_t offset = up ? 64 : 0; ++	const uint32_t ln_vsshi = ln_x100((100 * vsshi_tgt) / (vsshi_tgt - delta)); ++	const uint32_t r_target = (ctrl->qclkps * 2000) / (c_die_vsshi * ln_vsshi); ++	const uint32_t r_dividend = 128 * (up ? r_cmd_ref : r_target); ++	return r_dividend / (r_cmd_ref + r_target) - offset; ++} ++ ++static void program_vsshi(struct sysinfo *ctrl, const uint16_t vccio_mv, const uint16_t vsshi) ++{ ++	const uint16_t vsshi_down = vsshi + 24; /* Panic threshold of 24 mV */ ++	const uint16_t vsshi_up = vccio_mv - vsshi_down; ++	const union ddr_comp_vsshi_reg ddr_comp_vsshi = { ++		.panic_drv_down_vref = compute_vsshi_vref(ctrl, vsshi_down, false), ++		.panic_drv_up_vref   = compute_vsshi_vref(ctrl, vsshi_up, true), ++		.vt_offset           = 128 * 450 / vccio_mv / 2, ++		.vt_slope_a          = 4, ++	}; ++	mchbar_write32(DDR_COMP_VSSHI, ddr_comp_vsshi.raw); ++} ++ ++static void program_misc(struct sysinfo *ctrl) ++{ ++	ctrl->misc_control_0.raw = mchbar_read32(DDR_SCRAM_MISC_CONTROL); ++	ctrl->misc_control_0.weaklock_latency = 12; ++	ctrl->misc_control_0.wl_sleep_cycles  =  5; ++	ctrl->misc_control_0.wl_wake_cycles   =  2; ++	mchbar_write32(DDR_SCRAM_MISC_CONTROL, ctrl->misc_control_0.raw); ++	for (uint8_t channel = 0; channel < NUM_CHANNELS; channel++) { ++		/* Keep scrambling disabled for training */ ++		mchbar_write32(DDR_SCRAMBLE_ch(channel), 0); ++	} ++} ++ ++/* Very weird, application-specific function */ ++static void override_comp(uint32_t value, uint32_t width, uint32_t shift, uint32_t offset) ++{ ++	const uint32_t mask = (1 << width) - 1; ++	uint32_t reg32 = mchbar_read32(offset); ++	reg32 &= ~(mask << shift); ++	reg32 |= (value << shift); ++	mchbar_write32(offset, reg32); ++} ++ ++static void program_ls_comp(struct sysinfo *ctrl) ++{ ++	/* Disable periodic COMP */ ++	const union pcu_comp_reg m_comp = { ++		.comp_disable  = 1, ++		.comp_interval = COMP_INT, ++		.comp_force    = 1, ++	}; ++	mchbar_write32(M_COMP, m_comp.raw); ++	udelay(10); ++ ++	/* Override level shifter compensation */ ++	const uint32_t ls_comp = 2; ++	override_comp(ls_comp, 3, 28, DDR_DATA_RCOMP_DATA_1); ++	override_comp(ls_comp, 3, 24, DDR_CMD_COMP); ++	override_comp(ls_comp, 3, 24, DDR_CKE_CTL_COMP); ++	override_comp(ls_comp, 3, 23, DDR_CLK_COMP); ++	override_comp(ls_comp, 3, 28, DDR_COMP_DATA_COMP_1); ++	override_comp(ls_comp, 3, 24, DDR_COMP_CMD_COMP); ++	override_comp(ls_comp, 4, 24, DDR_COMP_CTL_COMP); ++	override_comp(ls_comp, 4, 23, DDR_COMP_CLK_COMP); ++	override_comp(ls_comp, 3, 24, DDR_COMP_OVERRIDE); ++ ++	/* Manually update the COMP values */ ++	union ddr_scram_misc_control_reg ddr_scram_misc_ctrl = ctrl->misc_control_0; ++	ddr_scram_misc_ctrl.force_comp_update = 1; ++	mchbar_write32(DDR_SCRAM_MISC_CONTROL, ddr_scram_misc_ctrl.raw); ++ ++	/* Use a fixed offset between ODT Up/Dn */ ++	const union ddr_comp_data_comp_1_reg data_comp_1 = { ++		.raw = mchbar_read32(DDR_COMP_DATA_COMP_1), ++	}; ++	const uint32_t odt_offset = data_comp_1.rcomp_odt_down - data_comp_1.rcomp_odt_up; ++	ctrl->comp_ctl_0.odt_up_down_off  = odt_offset; ++	ctrl->comp_ctl_0.fixed_odt_offset = 1; ++	mchbar_write32(DDR_COMP_CTL_0, ctrl->comp_ctl_0.raw); ++} ++ ++/** TODO: Deduplicate PCODE stuff, it's already implemented in CPU code **/ ++static bool pcode_ready(void) ++{ ++	const unsigned int delay_step = 10; ++	for (unsigned int i = 0; i < 1000; i += delay_step) { ++		if (!(mchbar_read32(BIOS_MAILBOX_INTERFACE) & MAILBOX_RUN_BUSY)) ++			return true; ++ ++		udelay(delay_step); ++	}; ++	return false; ++} ++ ++static uint32_t pcode_mailbox_read(const uint32_t command) ++{ ++	if (!pcode_ready()) { ++		printk(BIOS_ERR, "PCODE: mailbox timeout on wait ready\n"); ++		return 0; ++	} ++	mchbar_write32(BIOS_MAILBOX_INTERFACE, command | MAILBOX_RUN_BUSY); ++	if (!pcode_ready()) { ++		printk(BIOS_ERR, "PCODE: mailbox timeout on completion\n"); ++		return 0; ++	} ++	return mchbar_read32(BIOS_MAILBOX_DATA); ++} ++ ++static int pcode_mailbox_write(const uint32_t command, const uint32_t data) ++{ ++	if (!pcode_ready()) { ++		printk(BIOS_ERR, "PCODE: mailbox timeout on wait ready\n"); ++		return -1; ++	} ++	mchbar_write32(BIOS_MAILBOX_DATA, data); ++	mchbar_write32(BIOS_MAILBOX_INTERFACE, command | MAILBOX_RUN_BUSY); ++	if (!pcode_ready()) { ++		printk(BIOS_ERR, "PCODE: mailbox timeout on completion\n"); ++		return -1; ++	} ++	return 0; ++} ++ ++static void enable_2x_refresh(struct sysinfo *ctrl) ++{ ++	if (!CONFIG(ENABLE_DDR_2X_REFRESH)) ++		return; ++ ++	printk(BIOS_DEBUG, "Enabling 2x Refresh\n"); ++	const bool asr = ctrl->flags.asr; ++	const bool lpddr = ctrl->lpddr; ++ ++	/* Mutually exclusive */ ++	assert(!asr || !lpddr); ++	if (!asr) { ++		uint32_t reg32 = pcode_mailbox_read(MAILBOX_BIOS_CMD_READ_DDR_2X_REFRESH); ++		if (!(reg32 & BIT(31))) {	/** TODO: What to do if this is locked? **/ ++			reg32 |= BIT(0);	/* Enable 2x refresh */ ++			reg32 |= BIT(31);	/* Lock */ ++ ++			if (lpddr) ++				reg32 |= 4 << 1;	/* LPDDR MR4 1/2 tREFI */ ++ ++			if (pcode_mailbox_write(MAILBOX_BIOS_CMD_WRITE_DDR_2X_REFRESH, reg32)) ++				printk(BIOS_ERR, "Could not enable Mailbox 2x Refresh\n"); ++		} ++		if (!lpddr) ++			return; ++	} ++	assert(asr || lpddr); ++	uint16_t refi_reduction = 50; ++	if (lpddr) { ++		refi_reduction = 97; ++		mchbar_clrbits32(PCU_DDR_PTM_CTL, 1 << 7); /* DISABLE_DRAM_TS */ ++	} ++	/** TODO: Remember why this is only done on cold boots **/ ++	if (ctrl->bootmode == BOOTMODE_COLD) { ++		ctrl->tREFI *= refi_reduction; ++		ctrl->tREFI /= 100; ++	} ++} ++ ++static void set_pcu_ddr_voltage(const uint16_t vdd_mv) ++{ ++	/** TODO: Handle other voltages? **/ ++	uint32_t pcu_ddr_voltage; ++	switch (vdd_mv) { ++	case 1200: ++		pcu_ddr_voltage = 3; ++		break; ++	case 1350: ++		pcu_ddr_voltage = 1; ++		break; ++	default: ++	case 1500: ++		pcu_ddr_voltage = 0; ++		break; ++	} ++	/* Set bits 0..2 */ ++	mchbar_write32(PCU_DDR_VOLTAGE, pcu_ddr_voltage); ++} ++ ++static void program_scheduler(struct sysinfo *ctrl) ++{ ++	/* ++	 * ZQ calibration needs to be serialized for LPDDR3. Otherwise, ++	 * the processor issues LPDDR3 ZQ calibration in parallel when ++	 * exiting Package C7 or deeper. This causes problems for dual ++	 * and quad die packages since all ranks share the same ZQ pin. ++	 * ++	 * Erratum HSM94: LPDDR3 ZQ Calibration Following Deep Package ++	 * C-state Exit May Lead to Unpredictable System Behavior ++	 */ ++	const union mcscheds_cbit_reg mcscheds_cbit = { ++		.dis_write_gap = 1, ++		.dis_odt       = is_hsw_ult() && !(ctrl->lpddr && ctrl->lpddr_dram_odt), ++		.serialize_zq  = ctrl->lpddr, ++	}; ++	mchbar_write32(MCSCHEDS_CBIT, mcscheds_cbit.raw); ++	mchbar_write32(MCMNTS_SC_WDBWM, 0x553c3038); ++	if (ctrl->lpddr) { ++		for (uint8_t channel = 0; channel < NUM_CHANNELS; channel++) { ++			if (!does_ch_exist(ctrl, channel)) ++				continue; ++ ++			union mcmain_command_rate_limit_reg cmd_rate_limit = { ++				.raw = mchbar_read32(COMMAND_RATE_LIMIT_ch(channel)), ++			}; ++			cmd_rate_limit.enable_cmd_limit = 1; ++			cmd_rate_limit.cmd_rate_limit   = 3; ++			mchbar_write32(COMMAND_RATE_LIMIT_ch(channel), cmd_rate_limit.raw); ++		} ++	} ++} ++ ++static uint8_t biggest_channel(const struct sysinfo *const ctrl) ++{ ++	_Static_assert(NUM_CHANNELS == 2, "Code assumes exactly two channels"); ++	return !!(ctrl->channel_size_mb[0] < ctrl->channel_size_mb[1]); ++} ++ ++static void dram_zones(struct sysinfo *ctrl) ++{ ++	/** TODO: Activate channel hash here, if enabled **/ ++	const uint8_t biggest = biggest_channel(ctrl); ++	const uint8_t smaller = !biggest; ++ ++	/** TODO: Use stacked mode if Memory Trace is enabled **/ ++	const union mad_chnl_reg mad_channel = { ++		.ch_a       = biggest, ++		.ch_b       = smaller, ++		.ch_c       = 2, ++		.lpddr_mode = ctrl->lpddr, ++	}; ++	mchbar_write32(MAD_CHNL, mad_channel.raw); ++ ++	const uint8_t channel_b_zone_size = ctrl->channel_size_mb[smaller] / 256; ++	const union mad_zr_reg mad_zr = { ++		.ch_b_double = channel_b_zone_size * 2, ++		.ch_b_single = channel_b_zone_size, ++	}; ++	mchbar_write32(MAD_ZR, mad_zr.raw); ++} ++ ++static uint8_t biggest_dimm(const struct raminit_dimm_info *dimms) ++{ ++	_Static_assert(NUM_SLOTS <= 2, "Code assumes at most two DIMMs per channel."); ++	if (NUM_SLOTS == 1) ++		return 0; ++ ++	return !!(dimms[0].data.size_mb < dimms[1].data.size_mb); ++} ++ ++static void dram_dimm_mapping(struct sysinfo *ctrl) ++{ ++	for (uint8_t channel = 0; channel < NUM_CHANNELS; channel++) { ++		if (!does_ch_exist(ctrl, channel)) { ++			const union mad_dimm_reg mad_dimm = { ++				.rank_interleave = 1, ++				.enh_interleave  = 1, ++			}; ++			mchbar_write32(MAD_DIMM(channel), mad_dimm.raw); ++			continue; ++		} ++		const uint8_t biggest = biggest_dimm(ctrl->dimms[channel]); ++		const uint8_t smaller = !biggest; ++		const struct dimm_attr_ddr3_st *dimm_a = &ctrl->dimms[channel][biggest].data; ++		const struct dimm_attr_ddr3_st *dimm_b = &ctrl->dimms[channel][smaller].data; ++		union mad_dimm_reg mad_dimm = { ++			.dimm_a_size     = dimm_a->size_mb / 256, ++			.dimm_b_size     = dimm_b->size_mb / 256, ++			.dimm_a_sel      = biggest, ++			.dimm_a_ranks    = dimm_a->ranks == 2, ++			.dimm_b_ranks    = dimm_b->ranks == 2, ++			.dimm_a_width    = dimm_a->width == 16, ++			.dimm_b_width    = dimm_b->width == 16, ++			.rank_interleave = 1, ++			.enh_interleave  = 1, ++			.ecc_mode        = 0,	/* Do not enable ECC yet */ ++		}; ++		if (is_hsw_ult()) ++			mad_dimm.dimm_b_width = mad_dimm.dimm_a_width; ++ ++		mchbar_write32(MAD_DIMM(channel), mad_dimm.raw); ++		if (ctrl->lpddr) ++			die("%s: Missing LPDDR support (LPDDR_MR_PARAMS)\n", __func__); ++	} ++} ++ ++enum raminit_status configure_mc(struct sysinfo *ctrl) ++{ ++	const uint16_t vccio_mv = 1000; ++	const uint16_t vsshi_mv = ctrl->vdd_mv - 950; ++	const bool dis_odt_static = is_hsw_ult(); /* Disable static ODT legs on ULT */ ++	const bool vddhi = ctrl->vdd_mv > 1350; ++ ++	program_misc_control(ctrl); ++	program_mrc_revision(); ++	program_ranks_used(ctrl); ++	program_ddr_data(ctrl, dis_odt_static, vddhi); ++	program_vsshi_control(ctrl, vsshi_mv); ++	program_dimm_vref(ctrl, vccio_mv, vddhi); ++	program_ddr_ca(ctrl, vddhi); ++	program_rcomp_vref(ctrl, dis_odt_static); ++	program_slew_rates(ctrl, vddhi); ++	program_vsshi(ctrl, vccio_mv, vsshi_mv); ++	program_misc(ctrl); ++	program_ls_comp(ctrl); ++	enable_2x_refresh(ctrl); ++	set_pcu_ddr_voltage(ctrl->vdd_mv); ++	configure_timings(ctrl); ++	configure_refresh(ctrl); ++	program_scheduler(ctrl); ++	dram_zones(ctrl); ++	dram_dimm_mapping(ctrl); ++ ++	return RAMINIT_STATUS_SUCCESS; ++} +diff --git a/src/northbridge/intel/haswell/native_raminit/raminit_main.c b/src/northbridge/intel/haswell/native_raminit/raminit_main.c +index 5f2be980d4..3a773cfa19 100644 +--- a/src/northbridge/intel/haswell/native_raminit/raminit_main.c ++++ b/src/northbridge/intel/haswell/native_raminit/raminit_main.c +@@ -23,6 +23,7 @@ static const struct task_entry cold_boot[] = { + 	{ collect_spd_info,                                       true, "PROCSPD",    }, + 	{ initialise_mpll,                                        true, "INITMPLL",   }, + 	{ convert_timings,                                        true, "CONVTIM",    }, ++	{ configure_mc,                                           true, "CONFMC",     }, + }; +  + /* Return a generic stepping value to make stepping checks simpler */ +@@ -54,6 +55,7 @@ static void initialize_ctrl(struct sysinfo *ctrl) +  + 	ctrl->cpu = cpu_get_cpuid(); + 	ctrl->stepping = get_stepping(ctrl->cpu); ++	ctrl->vdd_mv = is_hsw_ult() ? 1350 : 1500; /** FIXME: Hardcoded, does it matter? **/ + 	ctrl->dq_pins_interleaved = cfg->dq_pins_interleaved; + 	ctrl->bootmode = bootmode; + } +diff --git a/src/northbridge/intel/haswell/native_raminit/raminit_native.h b/src/northbridge/intel/haswell/native_raminit/raminit_native.h +index 01e5ed1bd6..aa86b9aa39 100644 +--- a/src/northbridge/intel/haswell/native_raminit/raminit_native.h ++++ b/src/northbridge/intel/haswell/native_raminit/raminit_native.h +@@ -3,15 +3,40 @@ + #ifndef HASWELL_RAMINIT_NATIVE_H + #define HASWELL_RAMINIT_NATIVE_H +  ++#include <assert.h> + #include <device/dram/ddr3.h> + #include <northbridge/intel/haswell/haswell.h> ++#include <string.h> ++#include <types.h> ++ ++#include "reg_structs.h" +  + #define SPD_LEN 256 +  ++/* Each channel has 4 ranks, spread across 2 slots */ ++#define NUM_SLOTRANKS		4 ++ ++#define NUM_GROUPS		2 ++ + /* 8 data lanes + 1 ECC lane */ + #define NUM_LANES		9 + #define NUM_LANES_NO_ECC	8 +  ++#define COMP_INT		10 ++ ++/* Always use 12 legs for emphasis (not trained) */ ++#define TXEQFULLDRV		(3 << 4) ++ ++enum command_training_iteration { ++	CT_ITERATION_CLOCK = 0, ++	CT_ITERATION_CMD_NORTH, ++	CT_ITERATION_CMD_SOUTH, ++	CT_ITERATION_CKE, ++	CT_ITERATION_CTL, ++	CT_ITERATION_CMD_VREF, ++	MAX_CT_ITERATION, ++}; ++ + enum raminit_boot_mode { + 	BOOTMODE_COLD, + 	BOOTMODE_WARM, +@@ -57,6 +82,9 @@ struct sysinfo { + 	 * LPDDR-specific functions have stubs which will halt upon execution. + 	 */ + 	bool lpddr; ++	bool lpddr_dram_odt; ++	uint8_t lpddr_cke_rank_map[NUM_CHANNELS]; ++	uint8_t dq_byte_map[NUM_CHANNELS][MAX_CT_ITERATION][2]; +  + 	struct raminit_dimm_info dimms[NUM_CHANNELS][NUM_SLOTS]; + 	union dimm_flags_ddr3_st flags; +@@ -93,16 +121,89 @@ struct sysinfo { + 	uint32_t mem_clock_mhz; + 	uint32_t mem_clock_fs;		/* Memory clock period in femtoseconds */ + 	uint32_t qclkps;		/* Quadrature clock period in picoseconds */ ++ ++	uint16_t vdd_mv; ++ ++	union ddr_scram_misc_control_reg misc_control_0; ++ ++	union ddr_comp_ctl_0_reg comp_ctl_0; ++	union ddr_comp_ctl_1_reg comp_ctl_1; ++ ++	union ddr_data_vref_adjust_reg dimm_vref; ++ ++	uint32_t data_offset_train[NUM_CHANNELS][NUM_LANES]; ++	uint32_t data_offset_comp[NUM_CHANNELS][NUM_LANES]; ++ ++	uint32_t dq_control_0[NUM_CHANNELS]; ++	uint32_t dq_control_1[NUM_CHANNELS][NUM_LANES]; ++	uint32_t dq_control_2[NUM_CHANNELS][NUM_LANES]; ++ ++	uint16_t tx_dq[NUM_CHANNELS][NUM_SLOTRANKS][NUM_LANES]; ++	uint16_t txdqs[NUM_CHANNELS][NUM_SLOTRANKS][NUM_LANES]; ++	uint8_t  tx_eq[NUM_CHANNELS][NUM_SLOTRANKS][NUM_LANES]; ++ ++	uint16_t rcven[NUM_CHANNELS][NUM_SLOTRANKS][NUM_LANES]; ++	uint8_t  rx_eq[NUM_CHANNELS][NUM_SLOTRANKS][NUM_LANES]; ++	uint8_t rxdqsp[NUM_CHANNELS][NUM_SLOTRANKS][NUM_LANES]; ++	uint8_t rxdqsn[NUM_CHANNELS][NUM_SLOTRANKS][NUM_LANES]; ++	int8_t  rxvref[NUM_CHANNELS][NUM_SLOTRANKS][NUM_LANES]; ++ ++	uint8_t clk_pi_code[NUM_CHANNELS][NUM_SLOTRANKS]; ++	uint8_t ctl_pi_code[NUM_CHANNELS][NUM_SLOTRANKS]; ++	uint8_t cke_pi_code[NUM_CHANNELS][NUM_SLOTRANKS]; ++ ++	uint8_t cke_cmd_pi_code[NUM_CHANNELS][NUM_GROUPS]; ++	uint8_t cmd_north_pi_code[NUM_CHANNELS][NUM_GROUPS]; ++	uint8_t cmd_south_pi_code[NUM_CHANNELS][NUM_GROUPS]; + }; +  ++static inline bool is_hsw_ult(void) ++{ ++	return CONFIG(INTEL_LYNXPOINT_LP); ++} ++ ++static inline bool rank_in_mask(uint8_t rank, uint8_t rankmask) ++{ ++	assert(rank < NUM_SLOTRANKS); ++	return !!(BIT(rank) & rankmask); ++} ++ ++static inline bool does_ch_exist(const struct sysinfo *ctrl, uint8_t channel) ++{ ++	return !!ctrl->dpc[channel]; ++} ++ ++static inline bool does_rank_exist(const struct sysinfo *ctrl, uint8_t rank) ++{ ++	return rank_in_mask(rank, ctrl->rankmap[0] | ctrl->rankmap[1]); ++} ++ ++static inline bool rank_in_ch(const struct sysinfo *ctrl, uint8_t rank, uint8_t channel) ++{ ++	assert(channel < NUM_CHANNELS); ++	return rank_in_mask(rank, ctrl->rankmap[channel]); ++} ++ ++/** TODO: Handling of data_offset_train could be improved, also coupled with reg updates **/ ++static inline void clear_data_offset_train_all(struct sysinfo *ctrl) ++{ ++	memset(ctrl->data_offset_train, 0, sizeof(ctrl->data_offset_train)); ++} ++ + void raminit_main(enum raminit_boot_mode bootmode); +  + enum raminit_status collect_spd_info(struct sysinfo *ctrl); + enum raminit_status initialise_mpll(struct sysinfo *ctrl); + enum raminit_status convert_timings(struct sysinfo *ctrl); ++enum raminit_status configure_mc(struct sysinfo *ctrl); ++ ++void configure_timings(struct sysinfo *ctrl); ++void configure_refresh(struct sysinfo *ctrl); +  + enum raminit_status wait_for_first_rcomp(void); +  ++uint8_t get_rx_bias(const struct sysinfo *ctrl); ++ + uint8_t get_tCWL(uint32_t mem_clock_mhz); + uint32_t get_tREFI(uint32_t mem_clock_mhz); + uint32_t get_tXP(uint32_t mem_clock_mhz); +diff --git a/src/northbridge/intel/haswell/native_raminit/reg_structs.h b/src/northbridge/intel/haswell/native_raminit/reg_structs.h +new file mode 100644 +index 0000000000..d11cda4b3d +--- /dev/null ++++ b/src/northbridge/intel/haswell/native_raminit/reg_structs.h +@@ -0,0 +1,405 @@ ++/* SPDX-License-Identifier: GPL-2.0-or-later */ ++ ++#ifndef HASWELL_RAMINIT_REG_STRUCTS_H ++#define HASWELL_RAMINIT_REG_STRUCTS_H ++ ++union ddr_data_rx_train_rank_reg { ++	struct __packed { ++		uint32_t rcven : 9; // Bits  8:0 ++		uint32_t dqs_p : 6; // Bits 14:9 ++		uint32_t rx_eq : 5; // Bits 19:15 ++		uint32_t dqs_n : 6; // Bits 25:20 ++		int32_t  vref  : 6; // Bits 31:26 ++	}; ++	uint32_t raw; ++}; ++ ++union ddr_data_tx_train_rank_reg { ++	struct __packed { ++		uint32_t dq_delay  : 9; // Bits  8:0 ++		uint32_t dqs_delay : 9; // Bits 17:9 ++		uint32_t           : 2; // Bits 19:18 ++		uint32_t tx_eq     : 6; // Bits 25:20 ++		uint32_t           : 6; // Bits 31:26 ++	}; ++	uint32_t raw; ++}; ++ ++union ddr_data_control_0_reg { ++	struct __packed { ++		uint32_t rx_training_mode      : 1; // Bits  0:0 ++		uint32_t wl_training_mode      : 1; // Bits  1:1 ++		uint32_t rl_training_mode      : 1; // Bits  2:2 ++		uint32_t samp_train_mode       : 1; // Bits  3:3 ++		uint32_t tx_on                 : 1; // Bits  4:4 ++		uint32_t rf_on                 : 1; // Bits  5:5 ++		uint32_t rx_pi_on              : 1; // Bits  6:6 ++		uint32_t tx_pi_on              : 1; // Bits  7:7 ++		uint32_t internal_clocks_on    : 1; // Bits  8:8 ++		uint32_t repeater_clocks_on    : 1; // Bits  9:9 ++		uint32_t tx_disable            : 1; // Bits 10:10 ++		uint32_t rx_disable            : 1; // Bits 11:11 ++		uint32_t tx_long               : 1; // Bits 12:12 ++		uint32_t rx_dqs_ctle           : 2; // Bits 14:13 ++		uint32_t rx_read_pointer       : 3; // Bits 17:15 ++		uint32_t driver_segment_enable : 1; // Bits 18:18 ++		uint32_t data_vccddq_hi        : 1; // Bits 19:19 ++		uint32_t read_rf_rd            : 1; // Bits 20:20 ++		uint32_t read_rf_wr            : 1; // Bits 21:21 ++		uint32_t read_rf_rank          : 2; // Bits 23:22 ++		uint32_t force_odt_on          : 1; // Bits 24:24 ++		uint32_t odt_samp_off          : 1; // Bits 25:25 ++		uint32_t disable_odt_static    : 1; // Bits 26:26 ++		uint32_t ddr_cr_force_odt_on   : 1; // Bits 27:27 ++		uint32_t lpddr_mode            : 1; // Bits 28:28 ++		uint32_t en_read_preamble      : 1; // Bits 29:29 ++		uint32_t odt_samp_extend_en    : 1; // Bits 30:30 ++		uint32_t early_rleak_en        : 1; // Bits 31:31 ++	}; ++	uint32_t raw; ++}; ++ ++union ddr_data_control_1_reg { ++	struct __packed { ++		int32_t  ref_pi               : 4; // Bits 3:0 ++		uint32_t dll_mask             : 2; // Bits 5:4 ++		uint32_t dll_weaklock         : 1; // Bits 6:6 ++		uint32_t sdll_segment_disable : 3; // Bits 9:7 ++		uint32_t rx_bias_ctl          : 3; // Bits 12:10 ++		int32_t  odt_delay            : 4; // Bits 16:13 ++		uint32_t odt_duration         : 3; // Bits 19:17 ++		int32_t  sense_amp_delay      : 4; // Bits 23:20 ++		uint32_t sense_amp_duration   : 3; // Bits 26:24 ++		uint32_t burst_end_odt_delay  : 3; // Bits 29:27   *** TODO: Check Broadwell *** ++		uint32_t lpddr_long_odt_en    : 1; // Bits 30:30 ++		uint32_t                      : 1; // Bits 31:31 ++	}; ++	uint32_t raw; ++}; ++ ++/* NOTE: Bits 31:19 are only valid for Broadwell onwards */ ++union ddr_data_control_2_reg { ++	struct __packed { ++		uint32_t rx_stagger_ctl    : 5; // Bits  4:0 ++		uint32_t force_bias_on     : 1; // Bits  5:5 ++		uint32_t force_rx_on       : 1; // Bits  6:6 ++		uint32_t leaker_comp       : 2; // Bits  8:7 ++		uint32_t rx_dqs_amp_offset : 4; // Bits 12:9 ++		uint32_t rx_clk_stg_num    : 5; // Bits 17:13 ++		uint32_t wl_long_delay     : 1; // Bits 18:18 ++		uint32_t enable_vref_pwrdn : 1; // Bits 19:19 ++		uint32_t ddr4_mode         : 1; // Bits 20:20 ++		uint32_t en_vddq_odt       : 1; // Bits 21:21 ++		uint32_t en_vtt_odt        : 1; // Bits 22:22 ++		uint32_t en_const_z_eq_tx  : 1; // Bits 23:23 ++		uint32_t tx_eq_dis         : 1; // Bits 24:24 ++		uint32_t rx_vref_prog_mfc  : 1; // Bits 25:25 ++		uint32_t cben              : 3; // Bits 28:26 ++		uint32_t tx_deskew_disable : 1; // Bits 29:29 ++		uint32_t rx_deskew_disable : 1; // Bits 30:30 ++		uint32_t dq_slew_dly_byp   : 1; // Bits 31:31 ++	}; ++	uint32_t raw; ++}; ++ ++union ddr_comp_data_comp_1_reg { ++	struct __packed { ++		uint32_t rcomp_odt_up   : 6; // Bits  5:0 ++		uint32_t                : 3; // Bits  8:6 ++		uint32_t rcomp_odt_down : 6; // Bits 14:9 ++		uint32_t                : 1; // Bits 15:15 ++		uint32_t panic_drv_down : 6; // Bits 21:16 ++		uint32_t panic_drv_up   : 6; // Bits 27:22 ++		uint32_t ls_comp        : 3; // Bits 30:28 ++		uint32_t                : 1; // Bits 31:31 ++	}; ++	uint32_t raw; ++}; ++ ++union ddr_comp_ctl_0_reg { ++	struct __packed { ++		uint32_t                    : 3; // Bits  2:0 ++		uint32_t disable_odt_static : 1; // Bits  3:3 ++		uint32_t odt_up_down_off    : 6; // Bits  9:4 ++		uint32_t fixed_odt_offset   : 1; // Bits 10:10 ++		int32_t  dq_drv_vref        : 4; // Bits 14:11 ++		int32_t  dq_odt_vref        : 5; // Bits 19:15 ++		int32_t  cmd_drv_vref       : 4; // Bits 23:20 ++		int32_t  ctl_drv_vref       : 4; // Bits 27:24 ++		int32_t  clk_drv_vref       : 4; // Bits 31:28 ++	}; ++	uint32_t raw; ++}; ++ ++union ddr_comp_ctl_1_reg { ++	struct __packed { ++		uint32_t dq_scomp       : 5; // Bits  4:0 ++		uint32_t cmd_scomp      : 5; // Bits  9:5 ++		uint32_t ctl_scomp      : 5; // Bits 14:10 ++		uint32_t clk_scomp      : 5; // Bits 19:15 ++		uint32_t tco_cmd_offset : 4; // Bits 23:20 ++		uint32_t comp_clk_on    : 1; // Bits 24:24 ++		uint32_t vccddq_hi      : 1; // Bits 25:25 ++		uint32_t                : 3; // Bits 28:26 ++		uint32_t dis_quick_comp : 1; // Bits 29:29 ++		uint32_t sin_step       : 1; // Bits 30:30 ++		uint32_t sin_step_adv   : 1; // Bits 31:31 ++	}; ++	uint32_t raw; ++}; ++ ++union ddr_data_vref_adjust_reg { ++	struct __packed { ++		int32_t  ca_vref_ctrl     : 7;// Bits  6:0 ++		int32_t  ch1_vref_ctrl    : 7;// Bits 13:7 ++		int32_t  ch0_vref_ctrl    : 7;// Bits 20:14 ++		uint32_t en_dimm_vref_ca  : 1;// Bits 21:21 ++		uint32_t en_dimm_vref_ch1 : 1;// Bits 22:22 ++		uint32_t en_dimm_vref_ch0 : 1;// Bits 23:23 ++		uint32_t hi_z_timer_ctrl  : 2;// Bits 25:24 ++		uint32_t vccddq_hi_qnnn_h : 1;// Bits 26:26 ++		uint32_t                  : 2;// Bits 28:27 ++		uint32_t ca_slow_bw       : 1;// Bits 29:29 ++		uint32_t ch0_slow_bw      : 1;// Bits 30:30 ++		uint32_t ch1_slow_bw      : 1;// Bits 31:31 ++	}; ++	uint32_t raw; ++}; ++ ++union ddr_data_vref_control_reg { ++	struct __packed { ++		uint32_t hi_bw_divider   : 2; // Bits  1:0 ++		uint32_t lo_bw_divider   : 2; // Bits  3:2 ++		uint32_t sample_divider  : 3; // Bits  6:4 ++		uint32_t open_loop       : 1; // Bits  7:7 ++		uint32_t slow_bw_error   : 2; // Bits  9:8 ++		uint32_t hi_bw_enable    : 1; // Bits 10:10 ++		uint32_t                 : 1; // Bits 11:11 ++		uint32_t vt_slope_b      : 3; // Bits 14:12 ++		uint32_t vt_slope_a      : 3; // Bits 17:15 ++		uint32_t vt_offset       : 3; // Bits 20:18 ++		uint32_t sel_code        : 3; // Bits 23:21 ++		uint32_t output_code     : 8; // Bits 31:24 ++	}; ++	uint32_t raw; ++}; ++ ++union ddr_comp_vsshi_reg { ++	struct __packed { ++		uint32_t panic_drv_down_vref : 6; // Bits  5:0 ++		uint32_t panic_drv_up_vref   : 6; // Bits 11:6 ++		uint32_t vt_offset           : 5; // Bits 16:12 ++		uint32_t vt_slope_a          : 3; // Bits 19:17 ++		uint32_t vt_slope_b          : 3; // Bits 22:20 ++		uint32_t                     : 9; // Bits 31:23 ++	}; ++	uint32_t raw; ++}; ++ ++union ddr_comp_vsshi_control_reg { ++	struct __packed { ++		uint32_t vsshi_target    : 6; // Bits  5:0 ++		uint32_t hi_bw_divider   : 2; // Bits  7:6 ++		uint32_t lo_bw_divider   : 2; // Bits  9:8 ++		uint32_t sample_divider  : 3; // Bits 12:10 ++		uint32_t open_loop       : 1; // Bits 13:13 ++		uint32_t bw_error        : 2; // Bits 15:14 ++		uint32_t panic_driver_en : 1; // Bits 16:16 ++		uint32_t                 : 1; // Bits 17:17 ++		uint32_t panic_voltage   : 4; // Bits 21:18 ++		uint32_t gain_boost      : 1; // Bits 22:22 ++		uint32_t sel_code        : 1; // Bits 23:23 ++		uint32_t output_code     : 8; // Bits 31:24 ++	}; ++	uint32_t raw; ++}; ++ ++union ddr_clk_controls_reg { ++	struct __packed { ++		uint32_t ref_pi             : 4; // Bits  3:0 ++		uint32_t dll_mask           : 2; // Bits  5:4 ++		uint32_t                    : 1; // Bits  6:6 ++		uint32_t tx_on              : 1; // Bits  7:7 ++		uint32_t internal_clocks_on : 1; // Bits  8:8 ++		uint32_t repeater_clocks_on : 1; // Bits  9:9 ++		uint32_t io_lb_ctl          : 2; // Bits 11:10 ++		uint32_t odt_mode           : 1; // Bits 12:12 ++		uint32_t                    : 8; // Bits 20:13 ++		uint32_t rx_vref            : 6; // Bits 26:21 ++		uint32_t vccddq_hi          : 1; // Bits 27:27 ++		uint32_t dll_weaklock       : 1; // Bits 28:28 ++		uint32_t lpddr_mode         : 1; // Bits 29:29 ++		uint32_t                    : 2; // Bits 31:30 ++	}; ++	uint32_t raw; ++}; ++ ++union ddr_cmd_controls_reg { ++	struct __packed { ++		int32_t  ref_pi             : 4; // Bits  3:0 ++		uint32_t dll_mask           : 2; // Bits  5:4 ++		uint32_t                    : 1; // Bits  6:6 ++		uint32_t tx_on              : 1; // Bits  7:7 ++		uint32_t internal_clocks_on : 1; // Bits  8:8 ++		uint32_t repeater_clocks_on : 1; // Bits  9:9 ++		uint32_t io_lb_ctl          : 2; // Bits 11:10 ++		uint32_t odt_mode           : 1; // Bits 12:12 ++		uint32_t cmd_tx_eq          : 2; // Bits 14:13 ++		uint32_t early_weak_drive   : 2; // Bits 16:15 ++		uint32_t                    : 4; // Bits 20:17 ++		int32_t  rx_vref            : 6; // Bits 26:21 ++		uint32_t vccddq_hi          : 1; // Bits 27:27 ++		uint32_t dll_weaklock       : 1; // Bits 28:28 ++		uint32_t lpddr_mode         : 1; // Bits 29:29 ++		uint32_t lpddr_ca_a_dis     : 1; // Bits 30:30 ++		uint32_t lpddr_ca_b_dis     : 1; // Bits 31:31 ++	}; ++	uint32_t raw; ++}; ++ ++/* Same register definition for CKE and CTL fubs */ ++union ddr_cke_ctl_controls_reg { ++	struct __packed { ++		int32_t  ref_pi             : 4; // Bits  3:0 ++		uint32_t dll_mask           : 2; // Bits  5:4 ++		uint32_t                    : 1; // Bits  6:6 ++		uint32_t tx_on              : 1; // Bits  7:7 ++		uint32_t internal_clocks_on : 1; // Bits  8:8 ++		uint32_t repeater_clocks_on : 1; // Bits  9:9 ++		uint32_t io_lb_ctl          : 2; // Bits 11:10 ++		uint32_t odt_mode           : 1; // Bits 12:12 ++		uint32_t cmd_tx_eq          : 2; // Bits 14:13 ++		uint32_t early_weak_drive   : 2; // Bits 16:15 ++		uint32_t ctl_tx_eq          : 2; // Bits 18:17 ++		uint32_t ctl_sr_drv         : 2; // Bits 20:19 ++		int32_t  rx_vref            : 6; // Bits 26:21 ++		uint32_t vccddq_hi          : 1; // Bits 27:27 ++		uint32_t dll_weaklock       : 1; // Bits 28:28 ++		uint32_t lpddr_mode         : 1; // Bits 29:29 ++		uint32_t la_drv_en_ovrd     : 1; // Bits 30:30 ++		uint32_t lpddr_ca_a_dis     : 1; // Bits 31:31 ++	}; ++	uint32_t raw; ++}; ++ ++union ddr_scram_misc_control_reg { ++	struct __packed { ++		uint32_t wl_wake_cycles       :  2; // Bits  1:0 ++		uint32_t wl_sleep_cycles      :  3; // Bits  4:2 ++		uint32_t force_comp_update    :  1; // Bits  5:5 ++		uint32_t weaklock_latency     :  4; // Bits  9:6 ++		uint32_t ddr_no_ch_interleave :  1; // Bits 10:10 ++		uint32_t lpddr_mode           :  1; // Bits 11:11 ++		uint32_t cke_mapping_ch0      :  4; // Bits 15:12 ++		uint32_t cke_mapping_ch1      :  4; // Bits 19:16 ++		uint32_t                      : 12; // Bits 31:20 ++	}; ++	uint32_t raw; ++}; ++ ++union mcscheds_cbit_reg { ++	struct __packed { ++		uint32_t dis_opp_cas    : 1; // Bits  0:0 ++		uint32_t dis_opp_is_cas : 1; // Bits  1:1 ++		uint32_t dis_opp_ras    : 1; // Bits  2:2 ++		uint32_t dis_opp_is_ras : 1; // Bits  3:3 ++		uint32_t dis_1c_byp     : 1; // Bits  4:4 ++		uint32_t dis_2c_byp     : 1; // Bits  5:5 ++		uint32_t dis_deprd_opt  : 1; // Bits  6:6 ++		uint32_t dis_pt_it      : 1; // Bits  7:7 ++		uint32_t dis_prcnt_ring : 1; // Bits  8:8 ++		uint32_t dis_prcnt_sa   : 1; // Bits  9:9 ++		uint32_t dis_blkr_ph    : 1; // Bits 10:10 ++		uint32_t dis_blkr_pe    : 1; // Bits 11:11 ++		uint32_t dis_blkr_pm    : 1; // Bits 12:12 ++		uint32_t dis_odt        : 1; // Bits 13:13 ++		uint32_t oe_always_off  : 1; // Bits 14:14 ++		uint32_t                : 1; // Bits 15:15 ++		uint32_t dis_aom        : 1; // Bits 16:16 ++		uint32_t block_rpq      : 1; // Bits 17:17 ++		uint32_t block_wpq      : 1; // Bits 18:18 ++		uint32_t invert_align   : 1; // Bits 19:19 ++		uint32_t dis_write_gap  : 1; // Bits 20:20 ++		uint32_t dis_zq         : 1; // Bits 21:21 ++		uint32_t dis_tt         : 1; // Bits 22:22 ++		uint32_t dis_opp_ref    : 1; // Bits 23:23 ++		uint32_t long_zq        : 1; // Bits 24:24 ++		uint32_t dis_srx_zq     : 1; // Bits 25:25 ++		uint32_t serialize_zq   : 1; // Bits 26:26 ++		uint32_t zq_fast_exec   : 1; // Bits 27:27 ++		uint32_t dis_drive_nop  : 1; // Bits 28:28 ++		uint32_t pres_wdb_ent   : 1; // Bits 29:29 ++		uint32_t dis_clk_gate   : 1; // Bits 30:30 ++		uint32_t                : 1; // Bits 31:31 ++	}; ++	uint32_t raw; ++}; ++ ++union mcmain_command_rate_limit_reg { ++	struct __packed { ++		uint32_t enable_cmd_limit :  1; // Bits  0:0 ++		uint32_t cmd_rate_limit   :  3; // Bits  3:1 ++		uint32_t reset_on_command :  4; // Bits  7:4 ++		uint32_t reset_delay      :  4; // Bits 11:8 ++		uint32_t ck_to_cke_delay  :  2; // Bits 13:12 ++		uint32_t                  : 17; // Bits 30:14 ++		uint32_t init_mrw_2n_cs   :  1; // Bits 31:31 ++	}; ++	uint32_t raw; ++}; ++ ++union mad_chnl_reg { ++	struct __packed { ++		uint32_t ch_a           :  2; // Bits  1:0 ++		uint32_t ch_b           :  2; // Bits  3:2 ++		uint32_t ch_c           :  2; // Bits  5:4 ++		uint32_t stacked_mode   :  1; // Bits  6:6 ++		uint32_t stkd_mode_bits :  3; // Bits  9:7 ++		uint32_t lpddr_mode     :  1; // Bits 10:10 ++		uint32_t                : 21; // Bits 31:11 ++	}; ++	uint32_t raw; ++}; ++ ++union mad_dimm_reg { ++	struct __packed { ++		uint32_t dimm_a_size     :  8;  // Bits  7:0 ++		uint32_t dimm_b_size     :  8;  // Bits 15:8 ++		uint32_t dimm_a_sel      :  1;  // Bits 16:16 ++		uint32_t dimm_a_ranks    :  1;  // Bits 17:17 ++		uint32_t dimm_b_ranks    :  1;  // Bits 18:18 ++		uint32_t dimm_a_width    :  1;  // Bits 19:19 ++		uint32_t dimm_b_width    :  1;  // Bits 20:20 ++		uint32_t rank_interleave :  1;  // Bits 21:21 ++		uint32_t enh_interleave  :  1;  // Bits 22:22 ++		uint32_t                 :  1;  // Bits 23:23 ++		uint32_t ecc_mode        :  2;  // Bits 25:24 ++		uint32_t hori_mode       :  1;  // Bits 26:26 ++		uint32_t hori_address    :  3;  // Bits 29:27 ++		uint32_t                 :  2;  // Bits 31:30 ++	}; ++	uint32_t raw; ++}; ++ ++union mad_zr_reg { ++	struct __packed { ++		uint32_t             : 16; // Bits 15:0 ++		uint32_t ch_b_double :  8; // Bits 23:16 ++		uint32_t ch_b_single :  8; // Bits 31:24 ++	}; ++	uint32_t raw; ++}; ++ ++/* Same definition for P_COMP, M_COMP, D_COMP */ ++union pcu_comp_reg { ++	struct __packed { ++		uint32_t comp_disable  :  1; // Bits  0:0 ++		uint32_t comp_interval :  4; // Bits  4:1 ++		uint32_t               :  3; // Bits  7:5 ++		uint32_t comp_force    :  1; // Bits  8:8 ++		uint32_t               : 23; // Bits 31:9 ++	}; ++	uint32_t raw; ++}; ++ ++#endif +diff --git a/src/northbridge/intel/haswell/native_raminit/timings_refresh.c b/src/northbridge/intel/haswell/native_raminit/timings_refresh.c +new file mode 100644 +index 0000000000..a9d960f31b +--- /dev/null ++++ b/src/northbridge/intel/haswell/native_raminit/timings_refresh.c +@@ -0,0 +1,13 @@ ++/* SPDX-License-Identifier: GPL-2.0-or-later */ ++ ++#include "raminit_native.h" ++ ++void configure_timings(struct sysinfo *ctrl) ++{ ++	/** TODO: Stub **/ ++} ++ ++void configure_refresh(struct sysinfo *ctrl) ++{ ++	/** TODO: Stub **/ ++} +diff --git a/src/northbridge/intel/haswell/registers/mchbar.h b/src/northbridge/intel/haswell/registers/mchbar.h +index 45f8174995..4c3f399b5d 100644 +--- a/src/northbridge/intel/haswell/registers/mchbar.h ++++ b/src/northbridge/intel/haswell/registers/mchbar.h +@@ -7,9 +7,98 @@ + #define NUM_CHANNELS	2 + #define NUM_SLOTS	2 +  ++/* Indexed register helper macros */ ++#define _DDRIO_C_R_B(r, ch, rank, byte)	((r) + 0x100 * (ch) + 0x4 * (rank) + 0x200 * (byte)) ++#define _MCMAIN_C_X(r, ch, x)		((r) + 0x400 * (ch) + 0x4 * (x)) ++#define _MCMAIN_C(r, ch)		((r) + 0x400 * (ch)) ++ + /* Register definitions */ ++ ++/* DDR DATA per-channel per-bytelane */ ++#define DQ_CONTROL_2(ch, byte)			_DDRIO_C_R_B(0x0064, ch, 0, byte) ++ ++/* DDR CKE per-channel */ ++#define DDR_CKE_ch_CMD_COMP_OFFSET(ch)		_DDRIO_C_R_B(0x1204, ch, 0, 0) ++#define DDR_CKE_ch_CMD_PI_CODING(ch)		_DDRIO_C_R_B(0x1208, ch, 0, 0) ++ ++#define DDR_CKE_ch_CTL_CONTROLS(ch)		_DDRIO_C_R_B(0x121c, ch, 0, 0) ++#define DDR_CKE_ch_CTL_RANKS_USED(ch)		_DDRIO_C_R_B(0x1220, ch, 0, 0) ++ ++/* DDR CTL per-channel */ ++#define DDR_CTL_ch_CTL_CONTROLS(ch)		_DDRIO_C_R_B(0x1c1c, ch, 0, 0) ++#define DDR_CTL_ch_CTL_RANKS_USED(ch)		_DDRIO_C_R_B(0x1c20, ch, 0, 0) ++ ++/* DDR CLK per-channel */ ++#define DDR_CLK_ch_RANKS_USED(ch)		_DDRIO_C_R_B(0x1800, ch, 0, 0) ++#define DDR_CLK_ch_COMP_OFFSET(ch)		_DDRIO_C_R_B(0x1808, ch, 0, 0) ++#define DDR_CLK_ch_PI_CODING(ch)		_DDRIO_C_R_B(0x180c, ch, 0, 0) ++#define DDR_CLK_ch_CONTROLS(ch)			_DDRIO_C_R_B(0x1810, ch, 0, 0) ++ ++/* DDR Scrambler */ ++#define DDR_SCRAMBLE_ch(ch)			(0x2000 + 4 * (ch)) ++#define DDR_SCRAM_MISC_CONTROL			0x2008 ++ ++/* DDR CMDN/CMDS per-channel (writes go to both CMDN and CMDS fubs) */ ++#define DDR_CMD_ch_COMP_OFFSET(ch)		_DDRIO_C_R_B(0x3204, ch, 0, 0) ++#define DDR_CMD_ch_PI_CODING(ch)		_DDRIO_C_R_B(0x3208, ch, 0, 0) ++#define DDR_CMD_ch_CONTROLS(ch)			_DDRIO_C_R_B(0x320c, ch, 0, 0) ++ ++/* DDR CKE/CTL per-channel (writes go to both CKE and CTL fubs) */ ++#define DDR_CKE_CTL_ch_CTL_COMP_OFFSET(ch)	_DDRIO_C_R_B(0x3414, ch, 0, 0) ++#define DDR_CKE_CTL_ch_CTL_PI_CODING(ch)	_DDRIO_C_R_B(0x3418, ch, 0, 0) ++ ++/* DDR DATA broadcast */ ++#define DDR_DATA_RX_TRAIN_RANK(rank)		_DDRIO_C_R_B(0x3600, 0, rank, 0) ++#define DDR_DATA_RX_PER_BIT_RANK(rank)		_DDRIO_C_R_B(0x3610, 0, rank, 0) ++#define DDR_DATA_TX_TRAIN_RANK(rank)		_DDRIO_C_R_B(0x3620, 0, rank, 0) ++#define DDR_DATA_TX_PER_BIT_RANK(rank)		_DDRIO_C_R_B(0x3630, 0, rank, 0) ++ ++#define DDR_DATA_RCOMP_DATA_1			0x3644 ++#define DDR_DATA_TX_XTALK			0x3648 ++#define DDR_DATA_RX_OFFSET_VDQ			0x364c ++#define DDR_DATA_OFFSET_COMP			0x365c ++#define DDR_DATA_CONTROL_1			0x3660 ++ ++#define DDR_DATA_OFFSET_TRAIN			0x3670 ++#define DDR_DATA_CONTROL_0			0x3674 ++#define DDR_DATA_VREF_ADJUST			0x3678 ++ ++/* DDR CMD broadcast */ ++#define DDR_CMD_COMP				0x3700 ++ ++/* DDR CKE/CTL broadcast */ ++#define DDR_CKE_CTL_COMP			0x3810 ++ ++/* DDR CLK broadcast */ ++#define DDR_CLK_COMP				0x3904 ++#define DDR_CLK_CONTROLS			0x3910 ++#define DDR_CLK_CB_STATUS			0x3918 ++ ++/* DDR COMP (global) */ ++#define DDR_COMP_DATA_COMP_1			0x3a04 ++#define DDR_COMP_CMD_COMP			0x3a08 ++#define DDR_COMP_CTL_COMP			0x3a0c ++#define DDR_COMP_CLK_COMP			0x3a10 ++#define DDR_COMP_CTL_0				0x3a14 ++#define DDR_COMP_CTL_1				0x3a18 ++#define DDR_COMP_VSSHI				0x3a1c ++#define DDR_COMP_OVERRIDE			0x3a20 ++#define DDR_COMP_VSSHI_CONTROL			0x3a24 ++ ++/* MCMAIN per-channel */ ++#define COMMAND_RATE_LIMIT_ch(ch)		_MCMAIN_C(0x4010, ch) ++ ++#define MC_INIT_STATE_ch(ch)			_MCMAIN_C(0x42a0, ch) ++ ++/* MCMAIN broadcast */ ++#define MCSCHEDS_CBIT		0x4c20 ++ ++#define MCMNTS_SC_WDBWM		0x4f8c ++ ++/* MCDECS */ + #define MAD_CHNL		0x5000 /* Address Decoder Channel Configuration */ + #define MAD_DIMM(ch)		(0x5004 + (ch) * 4) ++#define MAD_ZR			0x5014 + #define MC_INIT_STATE_G		0x5030 + #define MRC_REVISION		0x5034 /* MRC Revision */ +  +@@ -28,6 +117,8 @@ +  + #define PCU_DDR_PTM_CTL		0x5880 +  ++#define PCU_DDR_VOLTAGE		0x58a4 ++ + /* Some power MSRs are also represented in MCHBAR */ + #define MCH_PKG_POWER_LIMIT_LO	0x59a0 + #define MCH_PKG_POWER_LIMIT_HI	0x59a4 +@@ -48,6 +139,8 @@ + #define  MAILBOX_BIOS_CMD_FSM_MEASURE_INTVL	0x909 + #define  MAILBOX_BIOS_CMD_READ_PCH_POWER	0xa + #define  MAILBOX_BIOS_CMD_READ_PCH_POWER_EXT	0xb ++#define  MAILBOX_BIOS_CMD_READ_DDR_2X_REFRESH	0x17 ++#define  MAILBOX_BIOS_CMD_WRITE_DDR_2X_REFRESH	0x18 + #define  MAILBOX_BIOS_CMD_READ_C9C10_VOLTAGE	0x26 + #define  MAILBOX_BIOS_CMD_WRITE_C9C10_VOLTAGE	0x27 +  +@@ -66,6 +159,7 @@ + #define MC_BIOS_REQ		0x5e00 /* Memory frequency request register */ + #define MC_BIOS_DATA		0x5e04 /* Miscellaneous information for BIOS */ + #define SAPMCTL			0x5f00 ++#define M_COMP			0x5f08 +  + #define HDAUDRID		0x6008 + #define UMAGFXCTL		0x6020 +--  +2.39.2 + diff --git a/config/coreboot/haswell/patches/0014-haswell-NRI-Add-timings-refresh-programming.patch b/config/coreboot/haswell/patches/0014-haswell-NRI-Add-timings-refresh-programming.patch new file mode 100644 index 00000000..1b88f350 --- /dev/null +++ b/config/coreboot/haswell/patches/0014-haswell-NRI-Add-timings-refresh-programming.patch @@ -0,0 +1,541 @@ +From b64d728bfe7c8ee44af252338257e95d87864659 Mon Sep 17 00:00:00 2001 +From: Angel Pons <th3fanbus@gmail.com> +Date: Sat, 7 May 2022 20:59:58 +0200 +Subject: [PATCH 14/26] haswell NRI: Add timings/refresh programming + +Program the registers with timing and refresh parameters. + +Change-Id: Id2ea339d2c9ea8b56c71d6e88ec76949653ff5c2 +Signed-off-by: Angel Pons <th3fanbus@gmail.com> +--- + .../haswell/native_raminit/lookup_timings.c   | 102 ++++++++ + .../haswell/native_raminit/raminit_native.h   |  14 ++ + .../haswell/native_raminit/reg_structs.h      |  93 +++++++ + .../haswell/native_raminit/timings_refresh.c  | 233 +++++++++++++++++- + .../intel/haswell/registers/mchbar.h          |  12 + + 5 files changed, 452 insertions(+), 2 deletions(-) + +diff --git a/src/northbridge/intel/haswell/native_raminit/lookup_timings.c b/src/northbridge/intel/haswell/native_raminit/lookup_timings.c +index 038686c844..afe2c615d2 100644 +--- a/src/northbridge/intel/haswell/native_raminit/lookup_timings.c ++++ b/src/northbridge/intel/haswell/native_raminit/lookup_timings.c +@@ -60,3 +60,105 @@ uint32_t get_tXP(const uint32_t mem_clock_mhz) + 	}; + 	return lookup_timing(mem_clock_mhz, lut, ARRAY_SIZE(lut)); + } ++ ++static uint32_t get_lpddr_tCKE(const uint32_t mem_clock_mhz) ++{ ++	const struct timing_lookup lut[] = { ++		{  533,  4 }, ++		{  666,  5 }, ++		{ fmax,  6 }, ++	}; ++	return lookup_timing(mem_clock_mhz, lut, ARRAY_SIZE(lut)); ++} ++ ++static uint32_t get_ddr_tCKE(const uint32_t mem_clock_mhz) ++{ ++	const struct timing_lookup lut[] = { ++		{  533,  3 }, ++		{  800,  4 }, ++		{  933,  5 }, ++		{ 1200,  6 }, ++		{ fmax,  7 }, ++	}; ++	return lookup_timing(mem_clock_mhz, lut, ARRAY_SIZE(lut)); ++} ++ ++uint32_t get_tCKE(const uint32_t mem_clock_mhz, const bool lpddr) ++{ ++	return lpddr ? get_lpddr_tCKE(mem_clock_mhz) : get_ddr_tCKE(mem_clock_mhz); ++} ++ ++uint32_t get_tXPDLL(const uint32_t mem_clock_mhz) ++{ ++	const struct timing_lookup lut[] = { ++		{  400, 10 }, ++		{  533, 13 }, ++		{  666, 16 }, ++		{  800, 20 }, ++		{  933, 23 }, ++		{ 1066, 26 }, ++		{ 1200, 29 }, ++		{ fmax, 32 }, ++	}; ++	return lookup_timing(mem_clock_mhz, lut, ARRAY_SIZE(lut)); ++} ++ ++uint32_t get_tAONPD(const uint32_t mem_clock_mhz) ++{ ++	const struct timing_lookup lut[] = { ++		{  400,  4 }, ++		{  533,  5 }, ++		{  666,  6 }, ++		{  800,  7 }, /* SNB had 8 */ ++		{  933,  8 }, ++		{ 1066, 10 }, ++		{ 1200, 11 }, ++		{ fmax, 12 }, ++	}; ++	return lookup_timing(mem_clock_mhz, lut, ARRAY_SIZE(lut)); ++} ++ ++uint32_t get_tMOD(const uint32_t mem_clock_mhz) ++{ ++	const struct timing_lookup lut[] = { ++		{  800, 12 }, ++		{  933, 14 }, ++		{ 1066, 16 }, ++		{ 1200, 18 }, ++		{ fmax, 20 }, ++	}; ++	return lookup_timing(mem_clock_mhz, lut, ARRAY_SIZE(lut)); ++} ++ ++uint32_t get_tXS_offset(const uint32_t mem_clock_mhz) ++{ ++	return DIV_ROUND_UP(mem_clock_mhz, 100); ++} ++ ++static uint32_t get_lpddr_tZQOPER(const uint32_t mem_clock_mhz) ++{ ++	return (mem_clock_mhz * 360) / 1000; ++} ++ ++static uint32_t get_ddr_tZQOPER(const uint32_t mem_clock_mhz) ++{ ++	const struct timing_lookup lut[] = { ++		{  800, 256 }, ++		{  933, 299 }, ++		{ 1066, 342 }, ++		{ 1200, 384 }, ++		{ fmax, 427 }, ++	}; ++	return lookup_timing(mem_clock_mhz, lut, ARRAY_SIZE(lut)); ++} ++ ++/* tZQOPER defines the period required for ZQCL after SR exit */ ++uint32_t get_tZQOPER(const uint32_t mem_clock_mhz, const bool lpddr) ++{ ++	return lpddr ? get_lpddr_tZQOPER(mem_clock_mhz) : get_ddr_tZQOPER(mem_clock_mhz); ++} ++ ++uint32_t get_tZQCS(const uint32_t mem_clock_mhz, const bool lpddr) ++{ ++	return DIV_ROUND_UP(get_tZQOPER(mem_clock_mhz, lpddr), 4); ++} +diff --git a/src/northbridge/intel/haswell/native_raminit/raminit_native.h b/src/northbridge/intel/haswell/native_raminit/raminit_native.h +index aa86b9aa39..cd1f2eb2a5 100644 +--- a/src/northbridge/intel/haswell/native_raminit/raminit_native.h ++++ b/src/northbridge/intel/haswell/native_raminit/raminit_native.h +@@ -155,6 +155,12 @@ struct sysinfo { + 	uint8_t cke_cmd_pi_code[NUM_CHANNELS][NUM_GROUPS]; + 	uint8_t cmd_north_pi_code[NUM_CHANNELS][NUM_GROUPS]; + 	uint8_t cmd_south_pi_code[NUM_CHANNELS][NUM_GROUPS]; ++ ++	union tc_bank_reg tc_bank[NUM_CHANNELS]; ++	union tc_bank_rank_a_reg tc_bankrank_a[NUM_CHANNELS]; ++	union tc_bank_rank_b_reg tc_bankrank_b[NUM_CHANNELS]; ++	union tc_bank_rank_c_reg tc_bankrank_c[NUM_CHANNELS]; ++	union tc_bank_rank_d_reg tc_bankrank_d[NUM_CHANNELS]; + }; +  + static inline bool is_hsw_ult(void) +@@ -200,6 +206,14 @@ enum raminit_status configure_mc(struct sysinfo *ctrl); + void configure_timings(struct sysinfo *ctrl); + void configure_refresh(struct sysinfo *ctrl); +  ++uint32_t get_tCKE(uint32_t mem_clock_mhz, bool lpddr); ++uint32_t get_tXPDLL(uint32_t mem_clock_mhz); ++uint32_t get_tAONPD(uint32_t mem_clock_mhz); ++uint32_t get_tMOD(uint32_t mem_clock_mhz); ++uint32_t get_tXS_offset(uint32_t mem_clock_mhz); ++uint32_t get_tZQOPER(uint32_t mem_clock_mhz, bool lpddr); ++uint32_t get_tZQCS(uint32_t mem_clock_mhz, bool lpddr); ++ + enum raminit_status wait_for_first_rcomp(void); +  + uint8_t get_rx_bias(const struct sysinfo *ctrl); +diff --git a/src/northbridge/intel/haswell/native_raminit/reg_structs.h b/src/northbridge/intel/haswell/native_raminit/reg_structs.h +index d11cda4b3d..70487e1640 100644 +--- a/src/northbridge/intel/haswell/native_raminit/reg_structs.h ++++ b/src/northbridge/intel/haswell/native_raminit/reg_structs.h +@@ -335,6 +335,99 @@ union mcscheds_cbit_reg { + 	uint32_t raw; + }; +  ++union tc_bank_reg { ++	struct __packed { ++		uint32_t tRCD      : 5; // Bits  4:0 ++		uint32_t tRP       : 5; // Bits  9:5 ++		uint32_t tRAS      : 6; // Bits 15:10 ++		uint32_t tRDPRE    : 4; // Bits 19:16 ++		uint32_t tWRPRE    : 6; // Bits 25:20 ++		uint32_t tRRD      : 4; // Bits 29:26 ++		uint32_t tRPab_ext : 2; // Bits 31:30 ++	}; ++	uint32_t raw; ++}; ++ ++union tc_bank_rank_a_reg { ++	struct __packed { ++		uint32_t tCKE        : 4; // Bits  3:0 ++		uint32_t tFAW        : 8; // Bits 11:4 ++		uint32_t tRDRD_sr    : 3; // Bits 14:12 ++		uint32_t tRDRD_dr    : 4; // Bits 18:15 ++		uint32_t tRDRD_dd    : 4; // Bits 22:19 ++		uint32_t tRDPDEN     : 5; // Bits 27:23 ++		uint32_t             : 1; // Bits 28:28 ++		uint32_t cmd_3st_dis : 1; // Bits 29:29 ++		uint32_t cmd_stretch : 2; // Bits 31:30 ++	}; ++	uint32_t raw; ++}; ++ ++union tc_bank_rank_b_reg { ++	struct __packed { ++		uint32_t tWRRD_sr : 6; // Bits  5:0 ++		uint32_t tWRRD_dr : 4; // Bits  9:6 ++		uint32_t tWRRD_dd : 4; // Bits 13:10 ++		uint32_t tWRWR_sr : 3; // Bits 16:14 ++		uint32_t tWRWR_dr : 4; // Bits 20:17 ++		uint32_t tWRWR_dd : 4; // Bits 24:21 ++		uint32_t tWRPDEN  : 6; // Bits 30:25 ++		uint32_t dec_wrd  : 1; // Bits 31:31 ++	}; ++	uint32_t raw; ++}; ++ ++union tc_bank_rank_c_reg { ++	struct __packed { ++		uint32_t tXPDLL   : 6; // Bits  5:0 ++		uint32_t tXP      : 4; // Bits  9:6 ++		uint32_t tAONPD   : 4; // Bits 13:10 ++		uint32_t tRDWR_sr : 5; // Bits 18:14 ++		uint32_t tRDWR_dr : 5; // Bits 23:19 ++		uint32_t tRDWR_dd : 5; // Bits 28:24 ++		uint32_t          : 3; // Bits 31:29 ++	}; ++	uint32_t raw; ++}; ++ ++/* NOTE: Non-ULT only implements the lower 21 bits (odt_write_delay is 2 bits) */ ++union tc_bank_rank_d_reg { ++	struct __packed { ++		uint32_t tAA                : 5; // Bits  4:0 ++		uint32_t tCWL               : 5; // Bits  9:5 ++		uint32_t tCPDED             : 2; // Bits 11:10 ++		uint32_t tPRPDEN            : 2; // Bits 13:12 ++		uint32_t odt_read_delay     : 3; // Bits 16:14 ++		uint32_t odt_read_duration  : 2; // Bits 18:17 ++		uint32_t odt_write_duration : 3; // Bits 21:19 ++		uint32_t odt_write_delay    : 3; // Bits 24:22 ++		uint32_t odt_always_rank_0  : 1; // Bits 25:25 ++		uint32_t cmd_delay          : 2; // Bits 27:26 ++		uint32_t                    : 4; // Bits 31:28 ++	}; ++	uint32_t raw; ++}; ++ ++union tc_rftp_reg { ++	struct __packed { ++		uint32_t tREFI   : 16; // Bits 15:0 ++		uint32_t tRFC    :  9; // Bits 24:16 ++		uint32_t tREFIx9 :  7; // Bits 31:25 ++	}; ++	uint32_t raw; ++}; ++ ++union tc_srftp_reg { ++	struct __packed { ++		uint32_t tXSDLL     : 12; // Bits 11:0 ++		uint32_t tXS_offset :  4; // Bits 15:12 ++		uint32_t tZQOPER    : 10; // Bits 25:16 ++		uint32_t            :  2; // Bits 27:26 ++		uint32_t tMOD       :  4; // Bits 31:28 ++	}; ++	uint32_t raw; ++}; ++ + union mcmain_command_rate_limit_reg { + 	struct __packed { + 		uint32_t enable_cmd_limit :  1; // Bits  0:0 +diff --git a/src/northbridge/intel/haswell/native_raminit/timings_refresh.c b/src/northbridge/intel/haswell/native_raminit/timings_refresh.c +index a9d960f31b..20a05b359b 100644 +--- a/src/northbridge/intel/haswell/native_raminit/timings_refresh.c ++++ b/src/northbridge/intel/haswell/native_raminit/timings_refresh.c +@@ -1,13 +1,242 @@ + /* SPDX-License-Identifier: GPL-2.0-or-later */ +  ++#include <assert.h> ++#include <commonlib/clamp.h> ++#include <console/console.h> ++#include <delay.h> ++#include <device/pci_ops.h> ++#include <northbridge/intel/haswell/haswell.h> ++ + #include "raminit_native.h" +  ++#define BL		8	/* Burst length */ ++#define tCCD		4 ++#define tRPRE		1 ++#define tWPRE		1 ++#define tDLLK		512 ++ ++static bool is_sodimm(const enum spd_dimm_type_ddr3 type) ++{ ++	return type == SPD_DDR3_DIMM_TYPE_SO_DIMM || type == SPD_DDR3_DIMM_TYPE_72B_SO_UDIMM; ++} ++ ++static uint8_t get_odt_stretch(const struct sysinfo *const ctrl) ++{ ++	for (uint8_t channel = 0; channel < NUM_CHANNELS; channel++) { ++		/* Only stretch with 2 DIMMs per channel */ ++		if (ctrl->dpc[channel] != 2) ++			continue; ++ ++		const struct raminit_dimm_info *dimms = ctrl->dimms[channel]; ++ ++		/* Only stretch when using SO-DIMMs */ ++		if (!is_sodimm(dimms[0].data.dimm_type) || !is_sodimm(dimms[1].data.dimm_type)) ++			continue; ++ ++		/* Only stretch with mismatched card types */ ++		if (dimms[0].data.reference_card == dimms[1].data.reference_card) ++			continue; ++ ++		/* Stretch if one SO-DIMM is card F */ ++		for (uint8_t slot = 0; slot < NUM_SLOTS; slot++) { ++			if (dimms[slot].data.reference_card == 5) ++				return 1; ++		} ++	} ++	return 0; ++} ++ ++static union tc_bank_reg make_tc_bank(struct sysinfo *const ctrl) ++{ ++	return (union tc_bank_reg) { ++		.tRCD      = ctrl->tRCD, ++		.tRP       = ctrl->tRP, ++		.tRAS      = ctrl->tRAS, ++		.tRDPRE    = ctrl->tRTP, ++		.tWRPRE    = 4 + ctrl->tCWL + ctrl->tWR, ++		.tRRD      = ctrl->tRRD, ++		.tRPab_ext = 0,	/** TODO: For LPDDR, this is ctrl->tRPab - ctrl->tRP **/ ++	}; ++} ++ ++static union tc_bank_rank_a_reg make_tc_bankrank_a(struct sysinfo *ctrl, uint8_t odt_stretch) ++{ ++	/* Use 3N mode for DDR during training, but always use 1N mode for LPDDR */ ++	const uint32_t tCMD = ctrl->lpddr ? 0 : 3; ++	const uint32_t tRDRD_drdd = BL / 2 + 1 + tRPRE + odt_stretch + !!ctrl->lpddr; ++ ++	return (union tc_bank_rank_a_reg) { ++		.tCKE        = get_tCKE(ctrl->mem_clock_mhz, ctrl->lpddr), ++		.tFAW        = ctrl->tFAW, ++		.tRDRD_sr    = tCCD, ++		.tRDRD_dr    = tRDRD_drdd, ++		.tRDRD_dd    = tRDRD_drdd, ++		.tRDPDEN     = ctrl->tAA + BL / 2 + 1, ++		.cmd_3st_dis = 1,	/* Disable command tri-state before training */ ++		.cmd_stretch = tCMD, ++	}; ++} ++ ++static union tc_bank_rank_b_reg make_tc_bankrank_b(struct sysinfo *const ctrl) ++{ ++	const uint8_t tWRRD_drdd = ctrl->tCWL - ctrl->tAA + BL / 2 + 2 + tRPRE; ++	const uint8_t tWRWR_drdd = BL / 2 + 2 + tWPRE; ++ ++	return (union tc_bank_rank_b_reg) { ++		.tWRRD_sr = tCCD + ctrl->tCWL + ctrl->tWTR + 2, ++		.tWRRD_dr = ctrl->lpddr ? 8 : tWRRD_drdd, ++		.tWRRD_dd = ctrl->lpddr ? 8 : tWRRD_drdd, ++		.tWRWR_sr = tCCD, ++		.tWRWR_dr = tWRWR_drdd, ++		.tWRWR_dd = tWRWR_drdd, ++		.tWRPDEN  = ctrl->tWR + ctrl->tCWL + BL / 2, ++		.dec_wrd  = ctrl->tCWL >= 6, ++	}; ++} ++ ++static uint32_t get_tRDWR_sr(const struct sysinfo *ctrl) ++{ ++	if (ctrl->lpddr) { ++		const uint32_t tdqsck_max = DIV_ROUND_UP(5500, ctrl->qclkps * 2); ++		return ctrl->tAA - ctrl->tCWL + tCCD + tWPRE + tdqsck_max + 1; ++	} else { ++		const bool fast_clock = ctrl->mem_clock_mhz > 666; ++		return ctrl->tAA - ctrl->tCWL + tCCD + tWPRE + 2 + fast_clock; ++	} ++} ++ ++static union tc_bank_rank_c_reg make_tc_bankrank_c(struct sysinfo *ctrl, uint8_t odt_stretch) ++{ ++	const uint32_t tRDWR_sr = get_tRDWR_sr(ctrl); ++	const uint32_t tRDWR_drdd = tRDWR_sr + odt_stretch; ++ ++	return (union tc_bank_rank_c_reg) { ++		.tXPDLL   = get_tXPDLL(ctrl->mem_clock_mhz), ++		.tXP      = MAX(ctrl->tXP, 7),	/* Use a higher tXP for training */ ++		.tAONPD   = get_tAONPD(ctrl->mem_clock_mhz), ++		.tRDWR_sr = tRDWR_sr, ++		.tRDWR_dr = tRDWR_drdd, ++		.tRDWR_dd = tRDWR_drdd, ++	}; ++} ++ ++static union tc_bank_rank_d_reg make_tc_bankrank_d(struct sysinfo *ctrl, uint8_t odt_stretch) ++{ ++	const uint32_t odt_rd_delay = ctrl->tAA - ctrl->tCWL; ++	if (!ctrl->lpddr) { ++		return (union tc_bank_rank_d_reg) { ++			.tAA               = ctrl->tAA, ++			.tCWL              = ctrl->tCWL, ++			.tCPDED            = 1, ++			.tPRPDEN           = 1, ++			.odt_read_delay    = odt_rd_delay, ++			.odt_read_duration = odt_stretch, ++		}; ++	} ++ ++	/* tCWL has 1 extra clock because of tDQSS, subtract it here */ ++	const uint32_t tCWL_lpddr = ctrl->tCWL - 1; ++	const uint32_t odt_wr_delay = tCWL_lpddr + DIV_ROUND_UP(3500, ctrl->qclkps * 2); ++	const uint32_t odt_wr_duration = DIV_ROUND_UP(3500 - 1750, ctrl->qclkps * 2) + 1; ++ ++	return (union tc_bank_rank_d_reg) { ++		.tAA                = ctrl->tAA, ++		.tCWL               = tCWL_lpddr, ++		.tCPDED             = 2,	/* Required by JEDEC LPDDR3 spec */ ++		.tPRPDEN            = 1, ++		.odt_read_delay     = odt_rd_delay, ++		.odt_read_duration  = odt_stretch, ++		.odt_write_delay    = odt_wr_delay, ++		.odt_write_duration = odt_wr_duration, ++		.odt_always_rank_0  = ctrl->lpddr_dram_odt ++	}; ++} ++ ++/* ZQCS period values, in (tREFI * 128) units */ ++#define ZQCS_PERIOD_DDR3	128	/* tREFI * 128 = 7.8 us * 128 = 1ms */ ++#define ZQCS_PERIOD_LPDDR3	256	/* tREFI * 128 = 3.9 us * 128 = 0.5ms */ ++ ++static uint32_t make_tc_zqcal(const struct sysinfo *const ctrl) ++{ ++	const uint32_t zqcs_period = ctrl->lpddr ? ZQCS_PERIOD_LPDDR3 : ZQCS_PERIOD_DDR3; ++	const uint32_t tZQCS = get_tZQCS(ctrl->mem_clock_mhz, ctrl->lpddr); ++	return tZQCS << (is_hsw_ult() ? 10 : 8) | zqcs_period; ++} ++ ++static union tc_rftp_reg make_tc_rftp(const struct sysinfo *const ctrl) ++{ ++	/* ++	 * The tREFIx9 field should be programmed to minimum of 8.9 * tREFI (to allow ++	 * for possible delays from ZQ or isoc) and tRASmax (70us) divided by 1024. ++	 */ ++	return (union tc_rftp_reg) { ++		.tREFI   = ctrl->tREFI, ++		.tRFC    = ctrl->tRFC, ++		.tREFIx9 = ctrl->tREFI * 89 / 10240, ++	}; ++} ++ ++static union tc_srftp_reg make_tc_srftp(const struct sysinfo *const ctrl) ++{ ++	return (union tc_srftp_reg) { ++		.tXSDLL     = tDLLK, ++		.tXS_offset = get_tXS_offset(ctrl->mem_clock_mhz), ++		.tZQOPER    = get_tZQOPER(ctrl->mem_clock_mhz, ctrl->lpddr), ++		.tMOD       = get_tMOD(ctrl->mem_clock_mhz) - 8, ++	}; ++} ++ + void configure_timings(struct sysinfo *ctrl) + { +-	/** TODO: Stub **/ ++	if (ctrl->lpddr) ++		die("%s: Missing support for LPDDR\n"); ++ ++	const uint8_t odt_stretch = get_odt_stretch(ctrl); ++	const union tc_bank_reg tc_bank = make_tc_bank(ctrl); ++	const union tc_bank_rank_a_reg tc_bank_rank_a = make_tc_bankrank_a(ctrl, odt_stretch); ++	const union tc_bank_rank_b_reg tc_bank_rank_b = make_tc_bankrank_b(ctrl); ++	const union tc_bank_rank_c_reg tc_bank_rank_c = make_tc_bankrank_c(ctrl, odt_stretch); ++	const union tc_bank_rank_d_reg tc_bank_rank_d = make_tc_bankrank_d(ctrl, odt_stretch); ++ ++	const uint8_t wr_delay = tc_bank_rank_b.dec_wrd + 1; ++	uint8_t sc_wr_add_delay = 0; ++	sc_wr_add_delay |= wr_delay << 0; ++	sc_wr_add_delay |= wr_delay << 2; ++	sc_wr_add_delay |= wr_delay << 4; ++	sc_wr_add_delay |= wr_delay << 6; ++ ++	for (uint8_t channel = 0; channel < NUM_CHANNELS; channel++) { ++		if (!does_ch_exist(ctrl, channel)) ++			continue; ++ ++		ctrl->tc_bank[channel] = tc_bank; ++		ctrl->tc_bankrank_a[channel] = tc_bank_rank_a; ++		ctrl->tc_bankrank_b[channel] = tc_bank_rank_b; ++		ctrl->tc_bankrank_c[channel] = tc_bank_rank_c; ++		ctrl->tc_bankrank_d[channel] = tc_bank_rank_d; ++ ++		mchbar_write32(TC_BANK_ch(channel), ctrl->tc_bank[channel].raw); ++		mchbar_write32(TC_BANK_RANK_A_ch(channel), ctrl->tc_bankrank_a[channel].raw); ++		mchbar_write32(TC_BANK_RANK_B_ch(channel), ctrl->tc_bankrank_b[channel].raw); ++		mchbar_write32(TC_BANK_RANK_C_ch(channel), ctrl->tc_bankrank_c[channel].raw); ++		mchbar_write32(TC_BANK_RANK_D_ch(channel), ctrl->tc_bankrank_d[channel].raw); ++		mchbar_write8(SC_WR_ADD_DELAY_ch(channel), sc_wr_add_delay); ++	} + } +  + void configure_refresh(struct sysinfo *ctrl) + { +-	/** TODO: Stub **/ ++	const union tc_srftp_reg tc_srftp = make_tc_srftp(ctrl); ++	const union tc_rftp_reg  tc_rftp  = make_tc_rftp(ctrl); ++	const uint32_t tc_zqcal = make_tc_zqcal(ctrl); ++ ++	for (uint8_t channel = 0; channel < NUM_CHANNELS; channel++) { ++		if (!does_ch_exist(ctrl, channel)) ++			continue; ++ ++		mchbar_setbits32(TC_RFP_ch(channel), 0xff); ++		mchbar_write32(TC_RFTP_ch(channel),  tc_rftp.raw); ++		mchbar_write32(TC_SRFTP_ch(channel), tc_srftp.raw); ++		mchbar_write32(TC_ZQCAL_ch(channel), tc_zqcal); ++	} + } +diff --git a/src/northbridge/intel/haswell/registers/mchbar.h b/src/northbridge/intel/haswell/registers/mchbar.h +index 4c3f399b5d..2acc5cbbc8 100644 +--- a/src/northbridge/intel/haswell/registers/mchbar.h ++++ b/src/northbridge/intel/haswell/registers/mchbar.h +@@ -86,9 +86,21 @@ + #define DDR_COMP_VSSHI_CONTROL			0x3a24 +  + /* MCMAIN per-channel */ ++#define TC_BANK_ch(ch)				_MCMAIN_C(0x4000, ch) ++#define TC_BANK_RANK_A_ch(ch)			_MCMAIN_C(0x4004, ch) ++#define TC_BANK_RANK_B_ch(ch)			_MCMAIN_C(0x4008, ch) ++#define TC_BANK_RANK_C_ch(ch)			_MCMAIN_C(0x400c, ch) + #define COMMAND_RATE_LIMIT_ch(ch)		_MCMAIN_C(0x4010, ch) ++#define TC_BANK_RANK_D_ch(ch)			_MCMAIN_C(0x4014, ch) ++#define SC_ROUNDT_LAT_ch(ch)			_MCMAIN_C(0x4024, ch) +  ++#define SC_WR_ADD_DELAY_ch(ch)			_MCMAIN_C(0x40d0, ch) ++ ++#define TC_ZQCAL_ch(ch)				_MCMAIN_C(0x4290, ch) ++#define TC_RFP_ch(ch)				_MCMAIN_C(0x4294, ch) ++#define TC_RFTP_ch(ch)				_MCMAIN_C(0x4298, ch) + #define MC_INIT_STATE_ch(ch)			_MCMAIN_C(0x42a0, ch) ++#define TC_SRFTP_ch(ch)				_MCMAIN_C(0x42a4, ch) +  + /* MCMAIN broadcast */ + #define MCSCHEDS_CBIT		0x4c20 +--  +2.39.2 + diff --git a/config/coreboot/haswell/patches/0015-haswell-NRI-Program-memory-map.patch b/config/coreboot/haswell/patches/0015-haswell-NRI-Program-memory-map.patch new file mode 100644 index 00000000..ad8527b2 --- /dev/null +++ b/config/coreboot/haswell/patches/0015-haswell-NRI-Program-memory-map.patch @@ -0,0 +1,263 @@ +From 89ff35083af68d1b24c1633886202ecc153af67d Mon Sep 17 00:00:00 2001 +From: Angel Pons <th3fanbus@gmail.com> +Date: Sat, 7 May 2022 21:24:50 +0200 +Subject: [PATCH 15/26] haswell NRI: Program memory map + +This is very similar to Sandy/Ivy Bridge, except that there's several +registers to program in GDXCBAR. One of these GDXCBAR registers has a +lock bit that must be set in order for the memory controller to allow +normal access to DRAM. And it took me four months to realize this one +bit was the only reason why native raminit did not work. + +Change-Id: I3af73a018a7ba948701a542e661e7fefd57591fe +Signed-off-by: Angel Pons <th3fanbus@gmail.com> +--- + .../intel/haswell/native_raminit/Makefile.inc |   1 + + .../intel/haswell/native_raminit/memory_map.c | 183 ++++++++++++++++++ + .../haswell/native_raminit/raminit_main.c     |   1 + + .../haswell/native_raminit/raminit_native.h   |   1 + + .../intel/haswell/registers/host_bridge.h     |   2 + + 5 files changed, 188 insertions(+) + create mode 100644 src/northbridge/intel/haswell/native_raminit/memory_map.c + +diff --git a/src/northbridge/intel/haswell/native_raminit/Makefile.inc b/src/northbridge/intel/haswell/native_raminit/Makefile.inc +index fc55277a65..37d527e972 100644 +--- a/src/northbridge/intel/haswell/native_raminit/Makefile.inc ++++ b/src/northbridge/intel/haswell/native_raminit/Makefile.inc +@@ -4,6 +4,7 @@ romstage-y += configure_mc.c + romstage-y += lookup_timings.c + romstage-y += init_mpll.c + romstage-y += io_comp_control.c ++romstage-y += memory_map.c + romstage-y += raminit_main.c + romstage-y += raminit_native.c + romstage-y += spd_bitmunching.c +diff --git a/src/northbridge/intel/haswell/native_raminit/memory_map.c b/src/northbridge/intel/haswell/native_raminit/memory_map.c +new file mode 100644 +index 0000000000..e3aded2b37 +--- /dev/null ++++ b/src/northbridge/intel/haswell/native_raminit/memory_map.c +@@ -0,0 +1,183 @@ ++/* SPDX-License-Identifier: GPL-2.0-or-later */ ++ ++#include <device/pci_ops.h> ++#include <northbridge/intel/haswell/haswell.h> ++#include <southbridge/intel/lynxpoint/me.h> ++#include <types.h> ++ ++#include "raminit_native.h" ++ ++/* GDXCBAR */ ++#define MPCOHTRK_GDXC_MOT_ADDRESS_LO		0x10 ++#define MPCOHTRK_GDXC_MOT_ADDRESS_HI		0x14 ++#define MPCOHTRK_GDXC_MOT_REGION		0x18 ++ ++#define MPCOHTRK_GDXC_OCLA_ADDRESS_LO		0x20 ++#define MPCOHTRK_GDXC_OCLA_ADDRESS_HI		0x24 ++#define MPCOHTRK_GDXC_OCLA_REGION		0x28 ++ ++/* This lock bit made me lose what little sanity I had left. - Angel Pons */ ++#define MPCOHTRK_GDXC_OCLA_ADDRESS_HI_LOCK	BIT(2) ++ ++static inline uint32_t gdxcbar_read32(const uintptr_t offset) ++{ ++	return read32p((mchbar_read32(GDXCBAR) & ~1) + offset); ++} ++ ++static inline void gdxcbar_write32(const uintptr_t offset, const uint32_t value) ++{ ++	write32p((mchbar_read32(GDXCBAR) & ~1) + offset, value); ++} ++ ++static inline void gdxcbar_clrsetbits32(const uintptr_t offset, uint32_t clear, uint32_t set) ++{ ++	const uintptr_t address = (mchbar_read32(GDXCBAR) & ~1) + offset; ++	clrsetbits32((void *)address, clear, set); ++} ++ ++#define gdxcbar_setbits32(offset, set)		gdxcbar_clrsetbits32(offset, 0, set) ++#define gdxcbar_clrbits32(offset, clear)	gdxcbar_clrsetbits32(offset, clear, 0) ++ ++/* All values stored in here (except the bool) are specified in MiB */ ++struct memory_map_data { ++	uint32_t dpr_size; ++	uint32_t tseg_size; ++	uint32_t gtt_size; ++	uint32_t gms_size; ++	uint32_t me_stolen_size; ++	uint32_t mmio_size; ++	uint32_t touud; ++	uint32_t remaplimit; ++	uint32_t remapbase; ++	uint32_t tom; ++	uint32_t tom_minus_me; ++	uint32_t tolud; ++	uint32_t bdsm_base; ++	uint32_t gtt_base; ++	uint32_t tseg_base; ++	bool reclaim_possible; ++}; ++ ++static void compute_memory_map(struct memory_map_data *map) ++{ ++	map->tom_minus_me = map->tom - map->me_stolen_size; ++ ++	/* ++	 * MMIO size will actually be slightly smaller than computed, ++	 * but matches what MRC does and is more MTRR-friendly given ++	 * that TSEG is treated as WB, but SMRR makes TSEG UC anyway. ++	 */ ++	const uint32_t mmio_size = MIN(map->tom_minus_me, 4096) / 2; ++	map->gtt_base = ALIGN_DOWN(mmio_size, map->tseg_size); ++	map->tseg_base = map->gtt_base - map->tseg_size; ++	map->bdsm_base = map->gtt_base + map->gtt_size; ++	map->tolud = map->bdsm_base + map->gms_size; ++	map->reclaim_possible = map->tom_minus_me > map->tolud; ++ ++	if (map->reclaim_possible) { ++		map->remapbase  = MAX(4096, map->tom_minus_me); ++		map->touud      = MIN(4096, map->tom_minus_me) + map->remapbase - map->tolud; ++		map->remaplimit = map->touud - 1; ++	} else { ++		map->remapbase  = 0; ++		map->remaplimit = 0; ++		map->touud = map->tom_minus_me; ++	} ++} ++ ++static void display_memory_map(const struct memory_map_data *map) ++{ ++	if (!CONFIG(DEBUG_RAM_SETUP)) ++		return; ++ ++	printk(BIOS_DEBUG, "============ MEMORY MAP ============\n"); ++	printk(BIOS_DEBUG, "\n"); ++	printk(BIOS_DEBUG, "dpr_size       = %u MiB\n", map->dpr_size); ++	printk(BIOS_DEBUG, "tseg_size      = %u MiB\n", map->tseg_size); ++	printk(BIOS_DEBUG, "gtt_size       = %u MiB\n", map->gtt_size); ++	printk(BIOS_DEBUG, "gms_size       = %u MiB\n", map->gms_size); ++	printk(BIOS_DEBUG, "me_stolen_size = %u MiB\n", map->me_stolen_size); ++	printk(BIOS_DEBUG, "\n"); ++	printk(BIOS_DEBUG, "touud          = %u MiB\n", map->touud); ++	printk(BIOS_DEBUG, "remaplimit     = %u MiB\n", map->remaplimit); ++	printk(BIOS_DEBUG, "remapbase      = %u MiB\n", map->remapbase); ++	printk(BIOS_DEBUG, "tom            = %u MiB\n", map->tom); ++	printk(BIOS_DEBUG, "tom_minus_me   = %u MiB\n", map->tom_minus_me); ++	printk(BIOS_DEBUG, "tolud          = %u MiB\n", map->tolud); ++	printk(BIOS_DEBUG, "bdsm_base      = %u MiB\n", map->bdsm_base); ++	printk(BIOS_DEBUG, "gtt_base       = %u MiB\n", map->gtt_base); ++	printk(BIOS_DEBUG, "tseg_base      = %u MiB\n", map->tseg_base); ++	printk(BIOS_DEBUG, "\n"); ++	printk(BIOS_DEBUG, "reclaim_possible = %s\n", map->reclaim_possible ? "Yes" : "No"); ++} ++ ++static void map_write_reg64(const uint16_t reg, const uint64_t size) ++{ ++	const uint64_t value = size << 20; ++	pci_write_config32(HOST_BRIDGE, reg + 4, value >> 32); ++	pci_write_config32(HOST_BRIDGE, reg + 0, value >>  0); ++} ++ ++static void map_write_reg32(const uint16_t reg, const uint32_t size) ++{ ++	const uint32_t value = size << 20; ++	pci_write_config32(HOST_BRIDGE, reg, value); ++} ++ ++static void program_memory_map(const struct memory_map_data *map) ++{ ++	map_write_reg64(TOUUD, map->touud); ++	map_write_reg64(TOM,   map->tom); ++	if (map->reclaim_possible) { ++		map_write_reg64(REMAPBASE,  map->remapbase); ++		map_write_reg64(REMAPLIMIT, map->remaplimit); ++	} ++	if (map->me_stolen_size) { ++		map_write_reg64(MESEG_LIMIT, 0x80000 - map->me_stolen_size); ++		map_write_reg64(MESEG_BASE, map->tom_minus_me); ++		pci_or_config32(HOST_BRIDGE, MESEG_LIMIT, ME_STLEN_EN); ++	} ++	map_write_reg32(TOLUD, map->tolud); ++	map_write_reg32(BDSM,  map->bdsm_base); ++	map_write_reg32(BGSM,  map->gtt_base); ++	map_write_reg32(TSEG,  map->tseg_base); ++ ++	const uint32_t dpr_reg = map->tseg_base << 20 | map->dpr_size << 4; ++	pci_write_config32(HOST_BRIDGE, DPR, dpr_reg); ++ ++	const uint16_t gfx_stolen_size = GGC_IGD_MEM_IN_32MB_UNITS(map->gms_size / 32); ++	const uint16_t ggc = map->gtt_size << 8 | gfx_stolen_size; ++	pci_write_config16(HOST_BRIDGE, GGC, ggc); ++ ++	/** TODO: Do not hardcode these? GDXC has weird alignment requirements, though. **/ ++	gdxcbar_write32(MPCOHTRK_GDXC_MOT_ADDRESS_LO, 0); ++	gdxcbar_write32(MPCOHTRK_GDXC_MOT_ADDRESS_HI, 0); ++	gdxcbar_write32(MPCOHTRK_GDXC_MOT_REGION, 0); ++ ++	gdxcbar_write32(MPCOHTRK_GDXC_OCLA_ADDRESS_LO, 0); ++	gdxcbar_write32(MPCOHTRK_GDXC_OCLA_ADDRESS_HI, 0); ++	gdxcbar_write32(MPCOHTRK_GDXC_OCLA_REGION, 0); ++ ++	gdxcbar_setbits32(MPCOHTRK_GDXC_OCLA_ADDRESS_HI, MPCOHTRK_GDXC_OCLA_ADDRESS_HI_LOCK); ++} ++ ++enum raminit_status configure_memory_map(struct sysinfo *ctrl) ++{ ++	struct memory_map_data memory_map = { ++		.tom            = ctrl->channel_size_mb[0] + ctrl->channel_size_mb[1], ++		.dpr_size       = CONFIG_INTEL_TXT_DPR_SIZE, ++		.tseg_size      = CONFIG_SMM_TSEG_SIZE >> 20, ++		.me_stolen_size = intel_early_me_uma_size(), ++	}; ++	/** FIXME: MRC hardcodes iGPU parameters, but we should not **/ ++	const bool igpu_on = pci_read_config32(HOST_BRIDGE, DEVEN) & DEVEN_D2EN; ++	if (CONFIG(ONBOARD_VGA_IS_PRIMARY) || igpu_on) { ++		memory_map.gtt_size = 2; ++		memory_map.gms_size = 64; ++		pci_or_config32(HOST_BRIDGE, DEVEN, DEVEN_D2EN); ++	} ++	compute_memory_map(&memory_map); ++	display_memory_map(&memory_map); ++	program_memory_map(&memory_map); ++	return 0; ++} +diff --git a/src/northbridge/intel/haswell/native_raminit/raminit_main.c b/src/northbridge/intel/haswell/native_raminit/raminit_main.c +index 3a773cfa19..136a8ba989 100644 +--- a/src/northbridge/intel/haswell/native_raminit/raminit_main.c ++++ b/src/northbridge/intel/haswell/native_raminit/raminit_main.c +@@ -24,6 +24,7 @@ static const struct task_entry cold_boot[] = { + 	{ initialise_mpll,                                        true, "INITMPLL",   }, + 	{ convert_timings,                                        true, "CONVTIM",    }, + 	{ configure_mc,                                           true, "CONFMC",     }, ++	{ configure_memory_map,                                   true, "MEMMAP",     }, + }; +  + /* Return a generic stepping value to make stepping checks simpler */ +diff --git a/src/northbridge/intel/haswell/native_raminit/raminit_native.h b/src/northbridge/intel/haswell/native_raminit/raminit_native.h +index cd1f2eb2a5..4763b25e8d 100644 +--- a/src/northbridge/intel/haswell/native_raminit/raminit_native.h ++++ b/src/northbridge/intel/haswell/native_raminit/raminit_native.h +@@ -202,6 +202,7 @@ enum raminit_status collect_spd_info(struct sysinfo *ctrl); + enum raminit_status initialise_mpll(struct sysinfo *ctrl); + enum raminit_status convert_timings(struct sysinfo *ctrl); + enum raminit_status configure_mc(struct sysinfo *ctrl); ++enum raminit_status configure_memory_map(struct sysinfo *ctrl); +  + void configure_timings(struct sysinfo *ctrl); + void configure_refresh(struct sysinfo *ctrl); +diff --git a/src/northbridge/intel/haswell/registers/host_bridge.h b/src/northbridge/intel/haswell/registers/host_bridge.h +index 1ee0ab2890..0228cf6bb9 100644 +--- a/src/northbridge/intel/haswell/registers/host_bridge.h ++++ b/src/northbridge/intel/haswell/registers/host_bridge.h +@@ -34,6 +34,8 @@ +  + #define MESEG_BASE	0x70	/* Management Engine Base */ + #define MESEG_LIMIT	0x78	/* Management Engine Limit */ ++#define  MELCK		(1 << 10)	/* ME Range Lock */ ++#define  ME_STLEN_EN	(1 << 11)	/* ME Stolen Memory Enable */ +  + #define PAM0		0x80 + #define PAM1		0x81 +--  +2.39.2 + diff --git a/config/coreboot/haswell/patches/0016-haswell-NRI-Add-DDR3-JEDEC-reset-and-init.patch b/config/coreboot/haswell/patches/0016-haswell-NRI-Add-DDR3-JEDEC-reset-and-init.patch new file mode 100644 index 00000000..c321d239 --- /dev/null +++ b/config/coreboot/haswell/patches/0016-haswell-NRI-Add-DDR3-JEDEC-reset-and-init.patch @@ -0,0 +1,1038 @@ +From d24def01ec15f41a48331ef1e236270b2df90b84 Mon Sep 17 00:00:00 2001 +From: Angel Pons <th3fanbus@gmail.com> +Date: Sat, 7 May 2022 21:49:40 +0200 +Subject: [PATCH 16/26] haswell NRI: Add DDR3 JEDEC reset and init + +Implement JEDEC reset and init sequence for DDR3. The MRS commands are +issued through the REUT (Robust Electrical Unified Testing) hardware. + +Change-Id: I2a0c066537021b587599228086727cb1e041bff5 +Signed-off-by: Angel Pons <th3fanbus@gmail.com> +--- + .../intel/haswell/native_raminit/Makefile.inc |   3 + + .../intel/haswell/native_raminit/ddr3.c       | 217 ++++++++++++++++++ + .../haswell/native_raminit/io_comp_control.c  |  19 ++ + .../haswell/native_raminit/jedec_reset.c      | 120 ++++++++++ + .../haswell/native_raminit/raminit_main.c     |   2 + + .../haswell/native_raminit/raminit_native.h   | 101 ++++++++ + .../haswell/native_raminit/reg_structs.h      | 154 +++++++++++++ + .../intel/haswell/native_raminit/reut.c       | 196 ++++++++++++++++ + .../intel/haswell/registers/mchbar.h          |  21 ++ + src/southbridge/intel/lynxpoint/pch.h         |   2 + + 10 files changed, 835 insertions(+) + create mode 100644 src/northbridge/intel/haswell/native_raminit/ddr3.c + create mode 100644 src/northbridge/intel/haswell/native_raminit/jedec_reset.c + create mode 100644 src/northbridge/intel/haswell/native_raminit/reut.c + +diff --git a/src/northbridge/intel/haswell/native_raminit/Makefile.inc b/src/northbridge/intel/haswell/native_raminit/Makefile.inc +index 37d527e972..e9212df9e6 100644 +--- a/src/northbridge/intel/haswell/native_raminit/Makefile.inc ++++ b/src/northbridge/intel/haswell/native_raminit/Makefile.inc +@@ -1,11 +1,14 @@ + ## SPDX-License-Identifier: GPL-2.0-or-later +  + romstage-y += configure_mc.c ++romstage-y += ddr3.c ++romstage-y += jedec_reset.c + romstage-y += lookup_timings.c + romstage-y += init_mpll.c + romstage-y += io_comp_control.c + romstage-y += memory_map.c + romstage-y += raminit_main.c + romstage-y += raminit_native.c ++romstage-y += reut.c + romstage-y += spd_bitmunching.c + romstage-y += timings_refresh.c +diff --git a/src/northbridge/intel/haswell/native_raminit/ddr3.c b/src/northbridge/intel/haswell/native_raminit/ddr3.c +new file mode 100644 +index 0000000000..6ddb11488b +--- /dev/null ++++ b/src/northbridge/intel/haswell/native_raminit/ddr3.c +@@ -0,0 +1,217 @@ ++/* SPDX-License-Identifier: GPL-2.0-or-later */ ++ ++#include <assert.h> ++#include <console/console.h> ++#include <northbridge/intel/haswell/haswell.h> ++#include <types.h> ++ ++#include "raminit_native.h" ++ ++#define DDR3_RTTNOM(a, b, c) (((a) << 9) | ((b) << 6) | ((c) << 2)) ++ ++uint16_t encode_ddr3_rttnom(const uint32_t rttnom) ++{ ++	switch (rttnom) { ++	case 0:		return DDR3_RTTNOM(0, 0, 0);	/* RttNom is disabled */ ++	case 20:	return DDR3_RTTNOM(1, 0, 0);	/* RZQ/12 */ ++	case 30:	return DDR3_RTTNOM(1, 0, 1);	/* RZQ/8 */ ++	case 40:	return DDR3_RTTNOM(0, 1, 1);	/* RZQ/6 */ ++	case 60:	return DDR3_RTTNOM(0, 0, 1);	/* RZQ/4 */ ++	case 120:	return DDR3_RTTNOM(0, 1, 0);	/* RZQ/2 */ ++	} ++	printk(BIOS_ERR, "%s: Invalid rtt_nom value %u\n", __func__, rttnom); ++	return 0; ++} ++ ++static const uint8_t jedec_wr_t[12] = { 1, 2, 3, 4, 5, 5, 6, 6, 7, 7, 0, 0 }; ++ ++static void ddr3_program_mr0(struct sysinfo *ctrl, const uint8_t dll_reset) ++{ ++	assert(ctrl->tWR >= 5 && ctrl->tWR <= 16); ++	assert(ctrl->tAA >= 4); ++	const uint8_t jedec_cas = ctrl->tAA - 4; ++	const union { ++		struct __packed { ++			uint16_t burst_length     : 2; // Bits  1:0 ++			uint16_t cas_latency_msb  : 1; // Bits  2:2 ++			uint16_t read_burst_type  : 1; // Bits  3:3 ++			uint16_t cas_latency_low  : 3; // Bits  6:4 ++			uint16_t test_mode        : 1; // Bits  7:7 ++			uint16_t dll_reset        : 1; // Bits  8:8 ++			uint16_t write_recovery   : 3; // Bits 11:9 ++			uint16_t precharge_pd_dll : 1; // Bits 12:12 ++			uint16_t                  : 3; // Bits 15:13 ++		}; ++		uint16_t raw; ++	} mr0reg = { ++		.burst_length     = 0, ++		.cas_latency_msb  = !!(jedec_cas & BIT(3)), ++		.read_burst_type  = 0, ++		.cas_latency_low  = jedec_cas & 0x7, ++		.dll_reset        = 1, ++		.write_recovery   = jedec_wr_t[ctrl->tWR - 5], ++		.precharge_pd_dll = 0, ++	}; ++	for (uint8_t channel = 0; channel < NUM_CHANNELS; channel++) { ++		if (!does_ch_exist(ctrl, channel)) ++			continue; ++ ++		for (uint8_t slot = 0; slot < NUM_SLOTS; slot++) { ++			if (!rank_in_ch(ctrl, slot + slot, channel)) ++				continue; ++ ++			if (!ctrl->restore_mrs) ++				ctrl->mr0[channel][slot] = mr0reg.raw; ++		} ++		reut_issue_mrs_all(ctrl, channel, 0, ctrl->mr0[channel]); ++	} ++} ++ ++void ddr3_program_mr1(struct sysinfo *ctrl, const uint8_t wl_mode, const uint8_t q_off) ++{ ++	/* ++	 * JESD79-3F (JEDEC DDR3 spec) refers to bit 0 of MR1 as 'DLL Enable'. ++	 * However, its encoding is weird, and 'DLL Disable' makes more sense. ++	 * ++	 * Moreover, bit 5 is part of ODIC (Output Driver Impedance Control), ++	 * but all encodings where MR1 bit 5 is 1 are reserved. Thus, omit it. ++	 */ ++	union { ++		struct __packed { ++			uint16_t dll_disable      : 1; // Bits  0:0 ++			uint16_t od_impedance_ctl : 1; // Bits  1:1 ++			uint16_t odt_rtt_nom_low  : 1; // Bits  2:2 ++			uint16_t additive_latency : 2; // Bits  4:3 ++			uint16_t                  : 1; // Bits  5:5 ++			uint16_t odt_rtt_nom_mid  : 1; // Bits  6:6 ++			uint16_t write_level_mode : 1; // Bits  7:7 ++			uint16_t                  : 1; // Bits  8:8 ++			uint16_t odt_rtt_nom_high : 1; // Bits  9:9 ++			uint16_t                  : 1; // Bits 10:10 ++			uint16_t t_dqs            : 1; // Bits 11:11 ++			uint16_t q_off            : 1; // Bits 12:12 ++			uint16_t                  : 3; // Bits 15:13 ++		}; ++		uint16_t raw; ++	} mr1reg = { ++		.dll_disable      = 0, ++		.od_impedance_ctl = 1,	/* RZQ/7 */ ++		.additive_latency = 0, ++		.write_level_mode = wl_mode, ++		.t_dqs            = 0, ++		.q_off            = q_off, ++	}; ++	for (uint8_t channel = 0; channel < NUM_CHANNELS; channel++) { ++		if (!does_ch_exist(ctrl, channel)) ++			continue; ++ ++		mr1reg.raw &= ~RTTNOM_MASK; ++		mr1reg.raw |= encode_ddr3_rttnom(ctrl->dpc[channel] == 2 ? 60 : 0); ++		for (uint8_t slot = 0; slot < NUM_SLOTS; slot++) { ++			if (!rank_in_ch(ctrl, slot + slot, channel)) ++				continue; ++ ++			if (!ctrl->restore_mrs) ++				ctrl->mr1[channel][slot] = mr1reg.raw; ++		} ++		reut_issue_mrs_all(ctrl, channel, 1, ctrl->mr1[channel]); ++	} ++} ++ ++enum { ++	RTT_WR_OFF = 0, ++	RTT_WR_60  = 1, ++	RTT_WR_120 = 2, ++}; ++ ++static void ddr3_program_mr2(struct sysinfo *ctrl) ++{ ++	assert(ctrl->tCWL >= 5); ++	const bool dimm_srt = ctrl->flags.ext_temp_refresh && !ctrl->flags.asr; ++ ++	const union { ++		struct __packed { ++			uint16_t partial_array_sr  : 3; // Bits  0:2 ++			uint16_t cas_write_latency : 3; // Bits  5:3 ++			uint16_t auto_self_refresh : 1; // Bits  6:6 ++			uint16_t self_refresh_temp : 1; // Bits  7:7 ++			uint16_t                   : 1; // Bits  8:8 ++			uint16_t odt_rtt_wr        : 2; // Bits 10:9 ++			uint16_t                   : 5; // Bits 15:11 ++		}; ++		uint16_t raw; ++	} mr2reg = { ++		.partial_array_sr  = 0, ++		.cas_write_latency = ctrl->tCWL - 5, ++		.auto_self_refresh = ctrl->flags.asr, ++		.self_refresh_temp = dimm_srt, ++		.odt_rtt_wr        = is_hsw_ult() ? RTT_WR_120 : RTT_WR_60, ++	}; ++	for (uint8_t channel = 0; channel < NUM_CHANNELS; channel++) { ++		if (!does_ch_exist(ctrl, channel)) ++			continue; ++ ++		for (uint8_t slot = 0; slot < NUM_SLOTS; slot++) { ++			if (!rank_in_ch(ctrl, slot + slot, channel)) ++				continue; ++ ++			if (!ctrl->restore_mrs) ++				ctrl->mr2[channel][slot] = mr2reg.raw; ++		} ++		/* MR2 shadow register is similar but not identical to MR2 */ ++		if (!ctrl->restore_mrs) { ++			union tc_mr2_shadow_reg tc_mr2_shadow = { ++				.raw = mr2reg.raw & 0x073f, ++			}; ++			for (uint8_t slot = 0; slot < NUM_SLOTS; slot++) { ++				if (!rank_in_ch(ctrl, slot + slot, channel)) ++					continue; ++ ++				if (dimm_srt) ++					tc_mr2_shadow.srt_available |= BIT(slot); ++ ++				if (ctrl->rank_mirrored[channel] & BIT(slot + slot + 1)) ++					tc_mr2_shadow.addr_bit_swizzle |= BIT(slot); ++			} ++			mchbar_write32(TC_MR2_SHADOW_ch(channel), tc_mr2_shadow.raw); ++		} ++		reut_issue_mrs_all(ctrl, channel, 2, ctrl->mr2[channel]); ++	} ++} ++ ++static void ddr3_program_mr3(struct sysinfo *ctrl, const uint8_t mpr_mode) ++{ ++	const union { ++		struct __packed { ++			uint16_t mpr_loc  :  2; // Bits  1:0 ++			uint16_t mpr_mode :  1; // Bits  2:2 ++			uint16_t          : 13; // Bits 15:3 ++		}; ++		uint16_t raw; ++	} mr3reg = { ++		.mpr_loc  = 0, ++		.mpr_mode = mpr_mode, ++	}; ++	for (uint8_t channel = 0; channel < NUM_CHANNELS; channel++) { ++		if (!does_ch_exist(ctrl, channel)) ++			continue; ++ ++		for (uint8_t slot = 0; slot < NUM_SLOTS; slot++) { ++			if (!rank_in_ch(ctrl, slot + slot, channel)) ++				continue; ++ ++			if (!ctrl->restore_mrs) ++				ctrl->mr3[channel][slot] = mr3reg.raw; ++		} ++		reut_issue_mrs_all(ctrl, channel, 3, ctrl->mr3[channel]); ++	} ++} ++ ++enum raminit_status ddr3_jedec_init(struct sysinfo *ctrl) ++{ ++	ddr3_program_mr2(ctrl); ++	ddr3_program_mr3(ctrl, 0); ++	ddr3_program_mr1(ctrl, 0, 0); ++	ddr3_program_mr0(ctrl, 1); ++	return reut_issue_zq(ctrl, ctrl->chanmap, ZQ_INIT); ++} +diff --git a/src/northbridge/intel/haswell/native_raminit/io_comp_control.c b/src/northbridge/intel/haswell/native_raminit/io_comp_control.c +index 7e96c08938..ad8c848e57 100644 +--- a/src/northbridge/intel/haswell/native_raminit/io_comp_control.c ++++ b/src/northbridge/intel/haswell/native_raminit/io_comp_control.c +@@ -8,6 +8,25 @@ +  + #include "raminit_native.h" +  ++enum raminit_status io_reset(void) ++{ ++	union mc_init_state_g_reg mc_init_state_g = { ++		.raw = mchbar_read32(MC_INIT_STATE_G), ++	}; ++	mc_init_state_g.reset_io = 1; ++	mchbar_write32(MC_INIT_STATE_G, mc_init_state_g.raw); ++	struct stopwatch timer; ++	stopwatch_init_msecs_expire(&timer, 2000); ++	do { ++		mc_init_state_g.raw = mchbar_read32(MC_INIT_STATE_G); ++		if (mc_init_state_g.reset_io == 0) ++			return RAMINIT_STATUS_SUCCESS; ++ ++	} while (!stopwatch_expired(&timer)); ++	printk(BIOS_ERR, "Timed out waiting for DDR I/O reset to complete\n"); ++	return RAMINIT_STATUS_POLL_TIMEOUT; ++} ++ + enum raminit_status wait_for_first_rcomp(void) + { + 	struct stopwatch timer; +diff --git a/src/northbridge/intel/haswell/native_raminit/jedec_reset.c b/src/northbridge/intel/haswell/native_raminit/jedec_reset.c +new file mode 100644 +index 0000000000..de0f676758 +--- /dev/null ++++ b/src/northbridge/intel/haswell/native_raminit/jedec_reset.c +@@ -0,0 +1,120 @@ ++/* SPDX-License-Identifier: GPL-2.0-or-later */ ++ ++#include <console/console.h> ++#include <delay.h> ++#include <northbridge/intel/haswell/haswell.h> ++#include <southbridge/intel/lynxpoint/pch.h> ++#include <types.h> ++#include <timer.h> ++ ++#include "raminit_native.h" ++ ++static void assert_reset(const bool do_reset) ++{ ++	if (is_hsw_ult()) { ++		uint32_t pm_cfg2 = RCBA32(PM_CFG2); ++		if (do_reset) ++			pm_cfg2 &= ~PM_CFG2_DRAM_RESET_CTL; ++		else ++			pm_cfg2 |= PM_CFG2_DRAM_RESET_CTL; ++		RCBA32(PM_CFG2) = pm_cfg2; ++	} else { ++		union mc_init_state_g_reg mc_init_state_g = { ++			.raw = mchbar_read32(MC_INIT_STATE_G), ++		}; ++		mc_init_state_g.ddr_not_reset = !do_reset; ++		mchbar_write32(MC_INIT_STATE_G, mc_init_state_g.raw); ++	} ++} ++ ++/* ++ * Perform JEDEC reset. ++ * ++ * If RTT_NOM is to be enabled in MR1, the ODT input signal must be ++ * statically held low in our system since RTT_NOM is always enabled. ++ */ ++static void jedec_reset(struct sysinfo *ctrl) ++{ ++	if (is_hsw_ult()) ++		assert_reset(false); ++ ++	union mc_init_state_g_reg mc_init_state_g = { ++		.ddr_not_reset     = 1, ++		.safe_self_refresh = 1, ++	}; ++	mchbar_write32(MC_INIT_STATE_G, mc_init_state_g.raw); ++ ++	union reut_misc_cke_ctrl_reg reut_misc_cke_ctrl = { ++		.cke_override = 0xf, ++		.cke_on       = 0, ++	}; ++	mchbar_write32(REUT_MISC_CKE_CTRL, reut_misc_cke_ctrl.raw); ++ ++	assert_reset(true); ++ ++	/** TODO: check and switch DDR3 voltage here (mainboard-specific) **/ ++ ++	udelay(200); ++ ++	assert_reset(false); ++ ++	udelay(500); ++ ++	mc_init_state_g.dclk_enable = 1; ++	mchbar_write32(MC_INIT_STATE_G, mc_init_state_g.raw); ++ ++	/* Delay at least 20 nanoseconds for tCKSRX */ ++	tick_delay(1); ++ ++	for (uint8_t channel = 0; channel < NUM_CHANNELS; channel++) { ++		reut_misc_cke_ctrl.cke_on = ctrl->rankmap[channel]; ++		mchbar_write32(REUT_ch_MISC_CKE_CTRL(channel), reut_misc_cke_ctrl.raw); ++	} ++ ++	/* ++	 * Wait minimum of reset CKE exit time, tXPR. ++	 * Spec says MAX(tXS, 5 tCK). 5 tCK is 10 ns. ++	 */ ++	tick_delay(1); ++} ++ ++enum raminit_status do_jedec_init(struct sysinfo *ctrl) ++{ ++	/* Never do a JEDEC reset in S3 resume */ ++	if (ctrl->bootmode == BOOTMODE_S3) ++		return RAMINIT_STATUS_SUCCESS; ++ ++	enum raminit_status status = io_reset(); ++	if (status) ++		return status; ++ ++	status = wait_for_first_rcomp(); ++	if (status) ++		return status; ++ ++	/* Force ODT low (JEDEC spec) */ ++	const union reut_misc_odt_ctrl_reg reut_misc_odt_ctrl = { ++		.odt_override = 0xf, ++		.odt_on       = 0, ++	}; ++	mchbar_write32(REUT_MISC_ODT_CTRL, reut_misc_odt_ctrl.raw); ++ ++	/* ++	 * Note: Haswell MRC does not clear ODT override for LPDDR3. However, ++	 * Broadwell MRC does. Hell suspects this difference is important, as ++	 * there is an erratum in the specification update for Broadwell: ++	 * ++	 * Erratum BDM74: LPDDR3 Memory Training May Cause Platform Boot Failure ++	 */ ++	if (ctrl->lpddr) ++		die("%s: LPDDR-specific JEDEC init not implemented\n", __func__); ++ ++	jedec_reset(ctrl); ++	status = ddr3_jedec_init(ctrl); ++	if (!status) ++		ctrl->restore_mrs = true; ++ ++	/* Release ODT override */ ++	mchbar_write32(REUT_MISC_ODT_CTRL, 0); ++	return status; ++} +diff --git a/src/northbridge/intel/haswell/native_raminit/raminit_main.c b/src/northbridge/intel/haswell/native_raminit/raminit_main.c +index 136a8ba989..73ff180b8c 100644 +--- a/src/northbridge/intel/haswell/native_raminit/raminit_main.c ++++ b/src/northbridge/intel/haswell/native_raminit/raminit_main.c +@@ -25,6 +25,7 @@ static const struct task_entry cold_boot[] = { + 	{ convert_timings,                                        true, "CONVTIM",    }, + 	{ configure_mc,                                           true, "CONFMC",     }, + 	{ configure_memory_map,                                   true, "MEMMAP",     }, ++	{ do_jedec_init,                                          true, "JEDECINIT",  }, + }; +  + /* Return a generic stepping value to make stepping checks simpler */ +@@ -58,6 +59,7 @@ static void initialize_ctrl(struct sysinfo *ctrl) + 	ctrl->stepping = get_stepping(ctrl->cpu); + 	ctrl->vdd_mv = is_hsw_ult() ? 1350 : 1500; /** FIXME: Hardcoded, does it matter? **/ + 	ctrl->dq_pins_interleaved = cfg->dq_pins_interleaved; ++	ctrl->restore_mrs = false; + 	ctrl->bootmode = bootmode; + } +  +diff --git a/src/northbridge/intel/haswell/native_raminit/raminit_native.h b/src/northbridge/intel/haswell/native_raminit/raminit_native.h +index 4763b25e8d..e3cf4254a0 100644 +--- a/src/northbridge/intel/haswell/native_raminit/raminit_native.h ++++ b/src/northbridge/intel/haswell/native_raminit/raminit_native.h +@@ -27,6 +27,30 @@ + /* Always use 12 legs for emphasis (not trained) */ + #define TXEQFULLDRV		(3 << 4) +  ++/* DDR3 mode register bits */ ++#define MR0_DLL_RESET		BIT(8) ++ ++#define MR1_WL_ENABLE		BIT(7) ++#define MR1_QOFF_ENABLE		BIT(12) /* If set, output buffers disabled */ ++ ++#define RTTNOM_MASK		(BIT(9) | BIT(6) | BIT(2)) ++ ++/* ZQ calibration types */ ++enum { ++	ZQ_INIT,	/* DDR3: ZQCL with tZQinit, LPDDR3: ZQ Init  with tZQinit  */ ++	ZQ_LONG,	/* DDR3: ZQCL with tZQoper, LPDDR3: ZQ Long  with tZQCL    */ ++	ZQ_SHORT,	/* DDR3: ZQCS with tZQCS,   LPDDR3: ZQ Short with tZQCS    */ ++	ZQ_RESET,	/* DDR3: not used,          LPDDR3: ZQ Reset with tZQreset */ ++}; ++ ++/* REUT initialisation modes */ ++enum { ++	REUT_MODE_IDLE = 0, ++	REUT_MODE_TEST = 1, ++	REUT_MODE_MRS  = 2, ++	REUT_MODE_NOP  = 3, /* Normal operation mode */ ++}; ++ + enum command_training_iteration { + 	CT_ITERATION_CLOCK = 0, + 	CT_ITERATION_CMD_NORTH, +@@ -50,6 +74,7 @@ enum raminit_status { + 	RAMINIT_STATUS_UNSUPPORTED_MEMORY, + 	RAMINIT_STATUS_MPLL_INIT_FAILURE, + 	RAMINIT_STATUS_POLL_TIMEOUT, ++	RAMINIT_STATUS_REUT_ERROR, + 	RAMINIT_STATUS_UNSPECIFIED_ERROR, /** TODO: Deprecated in favor of specific values **/ + }; +  +@@ -72,6 +97,7 @@ struct sysinfo { + 	uint32_t cpu;		/* CPUID value */ +  + 	bool dq_pins_interleaved; ++	bool restore_mrs; +  + 	/** TODO: ECC support untested **/ + 	bool is_ecc; +@@ -161,6 +187,11 @@ struct sysinfo { + 	union tc_bank_rank_b_reg tc_bankrank_b[NUM_CHANNELS]; + 	union tc_bank_rank_c_reg tc_bankrank_c[NUM_CHANNELS]; + 	union tc_bank_rank_d_reg tc_bankrank_d[NUM_CHANNELS]; ++ ++	uint16_t mr0[NUM_CHANNELS][NUM_SLOTRANKS]; ++	uint16_t mr1[NUM_CHANNELS][NUM_SLOTRANKS]; ++	uint16_t mr2[NUM_CHANNELS][NUM_SLOTRANKS]; ++	uint16_t mr3[NUM_CHANNELS][NUM_SLOTRANKS]; + }; +  + static inline bool is_hsw_ult(void) +@@ -196,6 +227,55 @@ static inline void clear_data_offset_train_all(struct sysinfo *ctrl) + 	memset(ctrl->data_offset_train, 0, sizeof(ctrl->data_offset_train)); + } +  ++/* Number of ticks to wait in units of 69.841279 ns (citation needed) */ ++static inline void tick_delay(const uint32_t delay) ++{ ++	volatile uint32_t junk; ++ ++	/* Just perform reads to a random register */ ++	for (uint32_t start = 0; start <= delay; start++) ++		junk = mchbar_read32(REUT_ERR_DATA_STATUS); ++} ++ ++/* ++ * 64-bit MCHBAR registers need to be accessed atomically. If one uses ++ * two 32-bit ops instead, there will be problems with the REUT's CADB ++ * (Command Address Data Buffer): hardware automatically advances the ++ * pointer into the register file after a write to the input register. ++ */ ++static inline uint64_t mchbar_read64(const uintptr_t x) ++{ ++	const uint64_t *offset = (uint64_t *)(CONFIG_FIXED_MCHBAR_MMIO_BASE + x); ++	uint64_t mmxsave, v; ++	asm volatile ( ++		"\n\t movq %%mm0, %0" ++		"\n\t movq %2, %%mm0" ++		"\n\t movq %%mm0, %1" ++		"\n\t movq %3, %%mm0" ++		"\n\t emms" ++		: "=m"(mmxsave), ++		  "=m"(v) ++		: "m"(offset[0]), ++		  "m"(mmxsave)); ++	return v; ++} ++ ++static inline void mchbar_write64(const uintptr_t x, const uint64_t v) ++{ ++	const uint64_t *offset = (uint64_t *)(CONFIG_FIXED_MCHBAR_MMIO_BASE + x); ++	uint64_t mmxsave; ++	asm volatile ( ++		"\n\t movq %%mm0, %0" ++		"\n\t movq %2, %%mm0" ++		"\n\t movq %%mm0, %1" ++		"\n\t movq %3, %%mm0" ++		"\n\t emms" ++		: "=m"(mmxsave) ++		: "m"(offset[0]), ++		  "m"(v), ++		  "m"(mmxsave)); ++} ++ + void raminit_main(enum raminit_boot_mode bootmode); +  + enum raminit_status collect_spd_info(struct sysinfo *ctrl); +@@ -203,6 +283,7 @@ enum raminit_status initialise_mpll(struct sysinfo *ctrl); + enum raminit_status convert_timings(struct sysinfo *ctrl); + enum raminit_status configure_mc(struct sysinfo *ctrl); + enum raminit_status configure_memory_map(struct sysinfo *ctrl); ++enum raminit_status do_jedec_init(struct sysinfo *ctrl); +  + void configure_timings(struct sysinfo *ctrl); + void configure_refresh(struct sysinfo *ctrl); +@@ -215,8 +296,28 @@ uint32_t get_tXS_offset(uint32_t mem_clock_mhz); + uint32_t get_tZQOPER(uint32_t mem_clock_mhz, bool lpddr); + uint32_t get_tZQCS(uint32_t mem_clock_mhz, bool lpddr); +  ++enum raminit_status io_reset(void); + enum raminit_status wait_for_first_rcomp(void); +  ++uint16_t encode_ddr3_rttnom(uint32_t rttnom); ++void ddr3_program_mr1(struct sysinfo *ctrl, uint8_t wl_mode, uint8_t q_off); ++enum raminit_status ddr3_jedec_init(struct sysinfo *ctrl); ++ ++void reut_issue_mrs( ++	struct sysinfo *ctrl, ++	uint8_t channel, ++	uint8_t rankmask, ++	uint8_t mr, ++	uint16_t val); ++ ++void reut_issue_mrs_all( ++	struct sysinfo *ctrl, ++	uint8_t channel, ++	uint8_t mr, ++	const uint16_t val[NUM_SLOTS]); ++ ++enum raminit_status reut_issue_zq(struct sysinfo *ctrl, uint8_t chanmask, uint8_t zq_type); ++ + uint8_t get_rx_bias(const struct sysinfo *ctrl); +  + uint8_t get_tCWL(uint32_t mem_clock_mhz); +diff --git a/src/northbridge/intel/haswell/native_raminit/reg_structs.h b/src/northbridge/intel/haswell/native_raminit/reg_structs.h +index 70487e1640..9929f617fe 100644 +--- a/src/northbridge/intel/haswell/native_raminit/reg_structs.h ++++ b/src/northbridge/intel/haswell/native_raminit/reg_structs.h +@@ -335,6 +335,127 @@ union mcscheds_cbit_reg { + 	uint32_t raw; + }; +  ++union reut_pat_cadb_prog_reg { ++	struct __packed { ++		uint32_t addr : 16; // Bits 15:0 ++		uint32_t      :  8; // Bits 23:16 ++		uint32_t bank :  3; // Bits 26:24 ++		uint32_t      :  5; // Bits 31:27 ++		uint32_t cs   :  4; // Bits 35:32 ++		uint32_t      :  4; // Bits 39:36 ++		uint32_t cmd  :  3; // Bits 42:40 ++		uint32_t      :  5; // Bits 47:43 ++		uint32_t odt  :  4; // Bits 51:48 ++		uint32_t      :  4; // Bits 55:52 ++		uint32_t cke  :  4; // Bits 59:56 ++		uint32_t      :  4; // Bits 63:60 ++	}; ++	uint64_t raw; ++	uint32_t raw32[2]; ++}; ++ ++union reut_pat_cadb_mrs_reg { ++	struct __packed { ++		uint32_t delay_gap : 3; // Bits  2:0 ++		uint32_t           : 5; // Bits  7:3 ++		uint32_t start_ptr : 3; // Bits 10:8 ++		uint32_t           : 5; // Bits 15:11 ++		uint32_t end_ptr   : 3; // Bits 18:16 ++		uint32_t           : 5; // Bits 23:19 ++		uint32_t curr_ptr  : 3; // Bits 26:24 ++		uint32_t           : 5; // Bits 31:27 ++	}; ++	uint32_t raw; ++}; ++ ++union reut_seq_cfg_reg { ++	struct __packed { ++		uint32_t                               :  3; // Bits  2:0 ++		uint32_t stop_base_seq_on_wrap_trigger :  1; // Bits  3:3 ++		uint32_t                               :  1; // Bits  4:4 ++		uint32_t address_update_rate_mode      :  1; // Bits  5:5 ++		uint32_t                               :  1; // Bits  6:6 ++		uint32_t enable_dummy_reads            :  1; // Bits  7:7 ++		uint32_t                               :  2; // Bits  9:8 ++		uint32_t enable_constant_write_strobe  :  1; // Bits 10:10 ++		uint32_t global_control                :  1; // Bits 11:11 ++		uint32_t initialization_mode           :  2; // Bits 13:12 ++		uint32_t                               :  2; // Bits 15:14 ++		uint32_t early_steppings_loop_count    :  5; // Bits 20:16   *** Not on C0 *** ++		uint32_t                               :  3; // Bits 23:21 ++		uint32_t subsequence_start_pointer     :  3; // Bits 26:24 ++		uint32_t                               :  1; // Bits 27:27 ++		uint32_t subsequence_end_pointer       :  3; // Bits 30:28 ++		uint32_t                               :  1; // Bits 31:31 ++		uint32_t start_test_delay              : 10; // Bits 41:32 ++		uint32_t                               : 22; // Bits 63:42 ++	}; ++	uint64_t raw; ++	uint32_t raw32[2]; ++}; ++ ++union reut_seq_ctl_reg { ++	struct __packed { ++		uint32_t start_test    :  1; // Bits  0:0 ++		uint32_t stop_test     :  1; // Bits  1:1 ++		uint32_t clear_errors  :  1; // Bits  2:2 ++		uint32_t               :  1; // Bits  3:3 ++		uint32_t stop_on_error :  1; // Bits  4:4 ++		uint32_t               : 27; // Bits 31:5 ++	}; ++	uint32_t raw; ++}; ++ ++union reut_global_err_reg { ++	struct __packed { ++		uint32_t ch_error     :  2; // Bits  1:0 ++		uint32_t              : 14; // Bits 15:2 ++		uint32_t ch_test_done :  2; // Bits 17:16 ++		uint32_t              : 14; // Bits 31:18 ++	}; ++	uint32_t raw; ++}; ++ ++union reut_misc_cke_ctrl_reg { ++	struct __packed { ++		uint32_t cke_override           :  4; // Bits  3:0 ++		uint32_t                        :  4; // Bits  7:4 ++		uint32_t cke_en_start_test_sync :  1; // Bits  8:8 ++		uint32_t                        :  7; // Bits 15:9 ++		uint32_t cke_on                 :  4; // Bits 19:16 ++		uint32_t                        : 12; // Bits 31:20 ++	}; ++	uint32_t raw; ++}; ++ ++union reut_misc_odt_ctrl_reg { ++	struct __packed { ++		uint32_t odt_override     :  4; // Bits  3:0 ++		uint32_t                  : 12; // Bits 15:4 ++		uint32_t odt_on           :  4; // Bits 19:16 ++		uint32_t                  : 11; // Bits 30:20 ++		uint32_t mpr_train_ddr_on :  1; // Bits 31:31 ++	}; ++	uint32_t raw; ++}; ++ ++union mcscheds_dft_misc_reg { ++	struct __packed { ++		uint32_t wdar                 :  1; // Bits  0:0 ++		uint32_t safe_mask_sel        :  3; // Bits  3:1 ++		uint32_t force_rcv_en         :  1; // Bits  4:4 ++		uint32_t                      :  3; // Bits  7:5 ++		uint32_t ddr_qualifier        :  2; // Bits  9:8 ++		uint32_t qualifier_length     :  2; // Bits 11:10 ++		uint32_t wdb_block_en         :  1; // Bits 12:12 ++		uint32_t rt_dft_read_ptr      :  4; // Bits 16:13 ++		uint32_t rt_dft_read_enable   :  1; // Bits 17:17 ++		uint32_t rt_dft_read_sel_addr :  1; // Bits 18:18 ++		uint32_t                      : 13; // Bits 31:19 ++	}; ++	uint32_t raw; ++}; ++ + union tc_bank_reg { + 	struct __packed { + 		uint32_t tRCD      : 5; // Bits  4:0 +@@ -428,6 +549,18 @@ union tc_srftp_reg { + 	uint32_t raw; + }; +  ++union tc_mr2_shadow_reg { ++	struct __packed { ++		uint32_t mr2_shadow_low   :  6; // Bits  5:0 ++		uint32_t srt_available    :  2; // Bits  7:6 ++		uint32_t mr2_shadow_high  :  3; // Bits 10:8 ++		uint32_t                  :  3; // Bits 13:11 ++		uint32_t addr_bit_swizzle :  2; // Bits 15:14 ++		uint32_t                  : 16; // Bits 31:16 ++	}; ++	uint32_t raw; ++}; ++ + union mcmain_command_rate_limit_reg { + 	struct __packed { + 		uint32_t enable_cmd_limit :  1; // Bits  0:0 +@@ -483,6 +616,27 @@ union mad_zr_reg { + 	uint32_t raw; + }; +  ++union mc_init_state_g_reg { ++	struct __packed { ++		uint32_t pu_mrc_done        :  1; // Bits  0:0 ++		uint32_t ddr_not_reset      :  1; // Bits  1:1 ++		uint32_t                    :  1; // Bits  2:2 ++		uint32_t refresh_enable     :  1; // Bits  3:3 ++		uint32_t                    :  1; // Bits  4:4 ++		uint32_t mc_init_done_ack   :  1; // Bits  5:5 ++		uint32_t                    :  1; // Bits  6:6 ++		uint32_t mrc_done           :  1; // Bits  7:7 ++		uint32_t safe_self_refresh  :  1; // Bits  8:8 ++		uint32_t                    :  1; // Bits  9:9 ++		uint32_t hvm_gate_ddr_reset :  1; // Bits 10:10 ++		uint32_t                    : 11; // Bits 21:11 ++		uint32_t dclk_enable        :  1; // Bits 22:22 ++		uint32_t reset_io           :  1; // Bits 23:23 ++		uint32_t                    :  8; // Bits 31:24 ++	}; ++	uint32_t raw; ++}; ++ + /* Same definition for P_COMP, M_COMP, D_COMP */ + union pcu_comp_reg { + 	struct __packed { +diff --git a/src/northbridge/intel/haswell/native_raminit/reut.c b/src/northbridge/intel/haswell/native_raminit/reut.c +new file mode 100644 +index 0000000000..c55cdd9c7e +--- /dev/null ++++ b/src/northbridge/intel/haswell/native_raminit/reut.c +@@ -0,0 +1,196 @@ ++/* SPDX-License-Identifier: GPL-2.0-or-later */ ++ ++#include <console/console.h> ++#include <delay.h> ++#include <northbridge/intel/haswell/haswell.h> ++#include <timer.h> ++#include <types.h> ++ ++#include "raminit_native.h" ++ ++enum { ++	CADB_CMD_MRS = 0, ++	CADB_CMD_REF = 1, ++	CADB_CMD_PRE = 2, ++	CADB_CMD_ACT = 3, ++	CADB_CMD_WR  = 4, ++	CADB_CMD_RD  = 5, ++	CADB_CMD_ZQ  = 6, ++	CADB_CMD_NOP = 7, ++}; ++ ++/* ++ * DDR3 rank mirror swaps the following pins: A3<->A4, A5<->A6, A7<->A8, BA0<->BA1 ++ * ++ * Note that the swapped bits are contiguous. We can use some XOR magic to swap the bits. ++ * Address lanes are at bits 0..15 and bank selects are at bits 24..26 on the REUT register. ++ */ ++#define MIRROR_BITS	(BIT(24) | BIT(7) | BIT(5) | BIT(3)) ++static uint64_t cadb_prog_rank_mirror(const uint64_t cadb_prog) ++{ ++	/* First XOR: find which pairs of bits are different (need swapping) */ ++	const uint64_t tmp64 = (cadb_prog ^ (cadb_prog >> 1)) & MIRROR_BITS; ++ ++	/* Second XOR: invert the pairs of bits that have different values */ ++	return cadb_prog ^ (tmp64 | tmp64 << 1); ++} ++ ++static enum raminit_status reut_write_cadb_cmd( ++	struct sysinfo *ctrl, ++	const uint8_t channel, ++	const uint8_t rankmask, ++	const uint8_t cmd, ++	const uint8_t bank, ++	const uint16_t valarr[NUM_SLOTRANKS], ++	const uint8_t delay) ++{ ++	union mcscheds_dft_misc_reg dft_misc = { ++		.raw = mchbar_read32(MCSCHEDS_DFT_MISC), ++	}; ++	dft_misc.ddr_qualifier = 0; ++	mchbar_write32(MCSCHEDS_DFT_MISC, dft_misc.raw); ++ ++	/* Pointer will be dynamically incremented after a write to CADB_PROG register */ ++	mchbar_write8(REUT_ch_PAT_CADB_WRITE_PTR(channel), 0); ++ ++	uint8_t count = 0; ++	for (uint8_t rank = 0; rank < NUM_SLOTRANKS; rank++) { ++		if (!(ctrl->rankmap[channel] & BIT(rank) & rankmask)) ++			continue; ++ ++		union reut_pat_cadb_prog_reg reut_cadb_prog = { ++			.addr = valarr[rank], ++			.bank = bank, ++			.cs   = ~BIT(rank), /* CS is active low */ ++			.cmd  = cmd, ++			.cke  = 0xf, ++		}; ++		if (ctrl->rank_mirrored[channel] & BIT(rank)) ++			reut_cadb_prog.raw = cadb_prog_rank_mirror(reut_cadb_prog.raw); ++ ++		mchbar_write64(REUT_ch_PAT_CADB_PROG(channel), reut_cadb_prog.raw); ++		count++; ++	} ++	if (!count) { ++		printk(BIOS_ERR, "%s: rankmask is invalid\n", __func__); ++		return RAMINIT_STATUS_UNSPECIFIED_ERROR;	/** FIXME: Is this needed? **/ ++	} ++	const union reut_pat_cadb_mrs_reg reut_cadb_mrs = { ++		.delay_gap = delay ? delay : 3, ++		.end_ptr   = count - 1, ++	}; ++	mchbar_write32(REUT_ch_PAT_CADB_MRS(channel), reut_cadb_mrs.raw); ++ ++	const uint32_t reut_seq_cfg_save = mchbar_read32(REUT_ch_SEQ_CFG(channel)); ++	union reut_seq_cfg_reg reut_seq_cfg = { ++		.raw = reut_seq_cfg_save, ++	}; ++	reut_seq_cfg.global_control = 0; ++	reut_seq_cfg.initialization_mode = REUT_MODE_MRS; ++	mchbar_write32(REUT_ch_SEQ_CFG(channel), reut_seq_cfg.raw); ++	mchbar_write32(REUT_ch_SEQ_CTL(channel), (union reut_seq_ctl_reg) { ++		.start_test   = 1, ++		.clear_errors = 1, ++	}.raw); ++	enum raminit_status status = RAMINIT_STATUS_SUCCESS; ++	union reut_global_err_reg reut_global_err; ++	struct stopwatch timer; ++	stopwatch_init_msecs_expire(&timer, 100); ++	do { ++		reut_global_err.raw = mchbar_read32(REUT_GLOBAL_ERR); ++		if (reut_global_err.ch_error & BIT(channel)) { ++			printk(BIOS_ERR, "Unexpected REUT error for channel %u\n", channel); ++			status = RAMINIT_STATUS_REUT_ERROR; ++			break; ++		} ++		if (stopwatch_expired(&timer)) { ++			printk(BIOS_ERR, "%s: REUT timed out!\n", __func__); ++			status = RAMINIT_STATUS_POLL_TIMEOUT; ++			break; ++		} ++	} while (!(reut_global_err.ch_test_done & BIT(channel))); ++	mchbar_write32(REUT_ch_SEQ_CTL(channel), (union reut_seq_ctl_reg) { ++		.clear_errors = 1, ++	}.raw); ++	mchbar_write32(REUT_ch_SEQ_CFG(channel), reut_seq_cfg_save); ++	return status; ++} ++ ++static enum raminit_status reut_write_cadb_cmd_all( ++	struct sysinfo *ctrl, ++	const uint8_t channel, ++	const uint8_t rankmask, ++	const uint8_t cmd, ++	const uint8_t bank, ++	const uint16_t val, ++	const uint8_t delay) ++{ ++	const uint16_t valarr[NUM_SLOTRANKS] = { val, val, val, val }; ++	return reut_write_cadb_cmd(ctrl, channel, rankmask, cmd, bank, valarr, delay); ++} ++ ++void reut_issue_mrs( ++	struct sysinfo *ctrl, ++	const uint8_t channel, ++	const uint8_t rankmask, ++	const uint8_t mr, ++	const uint16_t val) ++{ ++	reut_write_cadb_cmd_all(ctrl, channel, rankmask, CADB_CMD_MRS, mr, val, 0); ++} ++ ++void reut_issue_mrs_all( ++	struct sysinfo *ctrl, ++	const uint8_t channel, ++	const uint8_t mr, ++	const uint16_t val[NUM_SLOTS]) ++{ ++	const uint16_t valarr[NUM_SLOTRANKS] = { val[0], val[0], val[1], val[1] }; ++	reut_write_cadb_cmd(ctrl, channel, 0xf, CADB_CMD_MRS, mr, valarr, 0); ++} ++ ++enum raminit_status reut_issue_zq(struct sysinfo *ctrl, uint8_t chanmask, uint8_t zq_type) ++{ ++	/** TODO: Issuing ZQ commands differs for LPDDR **/ ++	if (ctrl->lpddr) ++		die("%s: LPDDR not yet supported in ZQ calibration\n"); ++ ++	uint8_t opcode; /* NOTE: Only used for LPDDR */ ++	uint16_t zq = 0; ++	switch (zq_type) { ++	case ZQ_INIT: ++		zq = BIT(10); ++		opcode = 0xff; ++		break; ++	case ZQ_LONG: ++		zq = BIT(10); ++		opcode = 0xab; ++		break; ++	case ZQ_SHORT: ++		opcode = 0x56; ++		break; ++	case ZQ_RESET: ++		opcode = 0xc3; ++		break; ++	default: ++		die("%s: ZQ type %u is invalid\n", zq_type); ++	} ++ ++	/* ZQCS on single-channel needs a longer delay */ ++	const uint8_t delay = zq_type == ZQ_SHORT && (!ctrl->dpc[0] || !ctrl->dpc[1]) ? 7 : 1; ++	enum raminit_status status = RAMINIT_STATUS_SUCCESS; ++	for (uint8_t channel = 0; channel < NUM_CHANNELS; channel++) { ++		if (!(BIT(channel) & chanmask) || !does_ch_exist(ctrl, channel)) ++			continue; ++ ++		status = reut_write_cadb_cmd_all(ctrl, channel, 0xf, CADB_CMD_ZQ, 0, zq, delay); ++		if (status) ++			break; ++	} ++ ++	/* Wait a bit after ZQ INIT and ZQCL commands */ ++	if (zq) ++		udelay(1); ++ ++	return status; ++} +diff --git a/src/northbridge/intel/haswell/registers/mchbar.h b/src/northbridge/intel/haswell/registers/mchbar.h +index 2acc5cbbc8..4fc78a7f43 100644 +--- a/src/northbridge/intel/haswell/registers/mchbar.h ++++ b/src/northbridge/intel/haswell/registers/mchbar.h +@@ -96,15 +96,36 @@ +  + #define SC_WR_ADD_DELAY_ch(ch)			_MCMAIN_C(0x40d0, ch) +  ++#define REUT_ch_MISC_CKE_CTRL(ch)		_MCMAIN_C(0x4190, ch) ++ ++#define REUT_ch_PAT_CADB_MRS(ch)		_MCMAIN_C(0x419c, ch) ++ ++#define REUT_ch_PAT_CADB_WRITE_PTR(ch)		_MCMAIN_C(0x41bc, ch) ++#define REUT_ch_PAT_CADB_PROG(ch)		_MCMAIN_C(0x41c0, ch) ++ + #define TC_ZQCAL_ch(ch)				_MCMAIN_C(0x4290, ch) + #define TC_RFP_ch(ch)				_MCMAIN_C(0x4294, ch) + #define TC_RFTP_ch(ch)				_MCMAIN_C(0x4298, ch) ++#define TC_MR2_SHADOW_ch(ch)			_MCMAIN_C(0x429c, ch) + #define MC_INIT_STATE_ch(ch)			_MCMAIN_C(0x42a0, ch) + #define TC_SRFTP_ch(ch)				_MCMAIN_C(0x42a4, ch) +  ++#define REUT_GLOBAL_ERR				0x4804 ++ ++#define REUT_ch_SEQ_CFG(ch)			(0x48a8 + 8 * (ch)) ++ ++#define REUT_ch_SEQ_CTL(ch)			(0x48b8 + 4 * (ch)) ++ + /* MCMAIN broadcast */ + #define MCSCHEDS_CBIT		0x4c20 +  ++#define MCSCHEDS_DFT_MISC	0x4c30 ++ ++#define REUT_ERR_DATA_STATUS	0x4ce0 ++ ++#define REUT_MISC_CKE_CTRL	0x4d90 ++#define REUT_MISC_ODT_CTRL	0x4d94 ++ + #define MCMNTS_SC_WDBWM		0x4f8c +  + /* MCDECS */ +diff --git a/src/southbridge/intel/lynxpoint/pch.h b/src/southbridge/intel/lynxpoint/pch.h +index 74b4d50017..16bef5032a 100644 +--- a/src/southbridge/intel/lynxpoint/pch.h ++++ b/src/southbridge/intel/lynxpoint/pch.h +@@ -586,6 +586,8 @@ void mainboard_config_rcba(void); + #define ACPIIRQEN	0x31e0	/* 32bit */ + #define OIC		0x31fe	/* 16bit */ + #define PRSTS		0x3310	/* 32bit */ ++#define PM_CFG2		0x333c	/* 32bit */ ++#define  PM_CFG2_DRAM_RESET_CTL	(1 << 26)	/* ULT only */ + #define PMSYNC_CONFIG	0x33c4	/* 32bit */ + #define PMSYNC_CONFIG2	0x33cc	/* 32bit */ + #define SOFT_RESET_CTRL 0x38f4 +--  +2.39.2 + diff --git a/config/coreboot/haswell/patches/0017-haswell-NRI-Add-pre-training-steps.patch b/config/coreboot/haswell/patches/0017-haswell-NRI-Add-pre-training-steps.patch new file mode 100644 index 00000000..e4cea123 --- /dev/null +++ b/config/coreboot/haswell/patches/0017-haswell-NRI-Add-pre-training-steps.patch @@ -0,0 +1,384 @@ +From 42e43eb210bbb172af8e5ad064326c4570be8654 Mon Sep 17 00:00:00 2001 +From: Angel Pons <th3fanbus@gmail.com> +Date: Sat, 7 May 2022 23:12:18 +0200 +Subject: [PATCH 17/26] haswell NRI: Add pre-training steps + +Implement pre-training steps, which consist of enabling ECC I/O and +filling the WDB (Write Data Buffer, stores test patterns) through a +magic LDAT port. + +Change-Id: Ie2e09e3b218c4569ed8de5c5e1b05d491032e0f1 +Signed-off-by: Angel Pons <th3fanbus@gmail.com> +--- + .../intel/haswell/native_raminit/Makefile.inc |   1 + + .../haswell/native_raminit/raminit_main.c     |  34 ++++ + .../haswell/native_raminit/raminit_native.h   |  24 +++ + .../haswell/native_raminit/reg_structs.h      |  45 +++++ + .../intel/haswell/native_raminit/setup_wdb.c  | 159 ++++++++++++++++++ + .../intel/haswell/registers/mchbar.h          |   9 + + 6 files changed, 272 insertions(+) + create mode 100644 src/northbridge/intel/haswell/native_raminit/setup_wdb.c + +diff --git a/src/northbridge/intel/haswell/native_raminit/Makefile.inc b/src/northbridge/intel/haswell/native_raminit/Makefile.inc +index e9212df9e6..8d7d4e4db0 100644 +--- a/src/northbridge/intel/haswell/native_raminit/Makefile.inc ++++ b/src/northbridge/intel/haswell/native_raminit/Makefile.inc +@@ -10,5 +10,6 @@ romstage-y += memory_map.c + romstage-y += raminit_main.c + romstage-y += raminit_native.c + romstage-y += reut.c ++romstage-y += setup_wdb.c + romstage-y += spd_bitmunching.c + romstage-y += timings_refresh.c +diff --git a/src/northbridge/intel/haswell/native_raminit/raminit_main.c b/src/northbridge/intel/haswell/native_raminit/raminit_main.c +index 73ff180b8c..5e4674957d 100644 +--- a/src/northbridge/intel/haswell/native_raminit/raminit_main.c ++++ b/src/northbridge/intel/haswell/native_raminit/raminit_main.c +@@ -13,6 +13,39 @@ +  + #include "raminit_native.h" +  ++static enum raminit_status pre_training(struct sysinfo *ctrl) ++{ ++	/* Skip on S3 resume */ ++	if (ctrl->bootmode == BOOTMODE_S3) ++		return RAMINIT_STATUS_SUCCESS; ++ ++	for (uint8_t channel = 0; channel < NUM_CHANNELS; channel++) { ++		for (uint8_t slot = 0; slot < NUM_SLOTS; slot++) { ++			if (!rank_in_ch(ctrl, slot + slot, channel)) ++				continue; ++ ++			printk(RAM_DEBUG, "C%uS%u:\n", channel, slot); ++			printk(RAM_DEBUG, "\tMR0: 0x%04x\n", ctrl->mr0[channel][slot]); ++			printk(RAM_DEBUG, "\tMR1: 0x%04x\n", ctrl->mr1[channel][slot]); ++			printk(RAM_DEBUG, "\tMR2: 0x%04x\n", ctrl->mr2[channel][slot]); ++			printk(RAM_DEBUG, "\tMR3: 0x%04x\n", ctrl->mr3[channel][slot]); ++			printk(RAM_DEBUG, "\n"); ++		} ++		if (ctrl->is_ecc) { ++			union mad_dimm_reg mad_dimm = { ++				.raw = mchbar_read32(MAD_DIMM(channel)), ++			}; ++			/* Enable ECC I/O */ ++			mad_dimm.ecc_mode = 1; ++			mchbar_write32(MAD_DIMM(channel), mad_dimm.raw); ++			/* Wait 4 usec after enabling the ECC I/O, needed by HW */ ++			udelay(4); ++		} ++	} ++	setup_wdb(ctrl); ++	return RAMINIT_STATUS_SUCCESS; ++} ++ + struct task_entry { + 	enum raminit_status (*task)(struct sysinfo *); + 	bool is_enabled; +@@ -26,6 +59,7 @@ static const struct task_entry cold_boot[] = { + 	{ configure_mc,                                           true, "CONFMC",     }, + 	{ configure_memory_map,                                   true, "MEMMAP",     }, + 	{ do_jedec_init,                                          true, "JEDECINIT",  }, ++	{ pre_training,                                           true, "PRETRAIN",   }, + }; +  + /* Return a generic stepping value to make stepping checks simpler */ +diff --git a/src/northbridge/intel/haswell/native_raminit/raminit_native.h b/src/northbridge/intel/haswell/native_raminit/raminit_native.h +index e3cf4254a0..f29c2ec366 100644 +--- a/src/northbridge/intel/haswell/native_raminit/raminit_native.h ++++ b/src/northbridge/intel/haswell/native_raminit/raminit_native.h +@@ -35,6 +35,13 @@ +  + #define RTTNOM_MASK		(BIT(9) | BIT(6) | BIT(2)) +  ++#define BASIC_VA_PAT_SPREAD_8	0x01010101 ++ ++#define WDB_CACHE_LINE_SIZE	8 ++ ++#define NUM_WDB_CL_MUX_SEEDS	3 ++#define NUM_CADB_MUX_SEEDS	3 ++ + /* ZQ calibration types */ + enum { + 	ZQ_INIT,	/* DDR3: ZQCL with tZQinit, LPDDR3: ZQ Init  with tZQinit  */ +@@ -318,6 +325,23 @@ void reut_issue_mrs_all( +  + enum raminit_status reut_issue_zq(struct sysinfo *ctrl, uint8_t chanmask, uint8_t zq_type); +  ++void write_wdb_fixed_pat( ++	const struct sysinfo *ctrl, ++	const uint8_t patterns[], ++	const uint8_t pat_mask[], ++	uint8_t spread, ++	uint16_t start); ++ ++void write_wdb_va_pat( ++	const struct sysinfo *ctrl, ++	uint32_t agg_mask, ++	uint32_t vic_mask, ++	uint8_t vic_rot, ++	uint16_t start); ++ ++void program_wdb_lfsr(const struct sysinfo *ctrl, bool cleanup); ++void setup_wdb(const struct sysinfo *ctrl); ++ + uint8_t get_rx_bias(const struct sysinfo *ctrl); +  + uint8_t get_tCWL(uint32_t mem_clock_mhz); +diff --git a/src/northbridge/intel/haswell/native_raminit/reg_structs.h b/src/northbridge/intel/haswell/native_raminit/reg_structs.h +index 9929f617fe..7aa8d8c8b2 100644 +--- a/src/northbridge/intel/haswell/native_raminit/reg_structs.h ++++ b/src/northbridge/intel/haswell/native_raminit/reg_structs.h +@@ -335,6 +335,18 @@ union mcscheds_cbit_reg { + 	uint32_t raw; + }; +  ++union reut_pat_cl_mux_lmn_reg { ++	struct __packed { ++		uint32_t l_data_select : 1; // Bits  0:0 ++		uint32_t en_sweep_freq : 1; // Bits  1:1 ++		uint32_t               : 6; // Bits  7:2 ++		uint32_t l_counter     : 8; // Bits 15:8 ++		uint32_t m_counter     : 8; // Bits 23:16 ++		uint32_t n_counter     : 8; // Bits 31:24 ++	}; ++	uint32_t raw; ++}; ++ + union reut_pat_cadb_prog_reg { + 	struct __packed { + 		uint32_t addr : 16; // Bits 15:0 +@@ -439,6 +451,39 @@ union reut_misc_odt_ctrl_reg { + 	uint32_t raw; + }; +  ++union ldat_pdat_reg { ++	struct __packed { ++		uint32_t fast_addr : 12; // Bits 11:0 ++		uint32_t           :  4; // Bits 15:12 ++		uint32_t addr_en   :  1; // Bits 16:16 ++		uint32_t seq_en    :  1; // Bits 17:17 ++		uint32_t pol_0     :  1; // Bits 18:18 ++		uint32_t pol_1     :  1; // Bits 19:19 ++		uint32_t cmd_a     :  4; // Bits 23:20 ++		uint32_t cmd_b     :  4; // Bits 27:24 ++		uint32_t cmd_c     :  4; // Bits 31:28 ++	}; ++	uint32_t raw; ++}; ++ ++union ldat_sdat_reg { ++	struct __packed { ++		uint32_t bank_sel   : 4; // Bits  3:0 ++		uint32_t            : 1; // Bits  4:4 ++		uint32_t array_sel  : 5; // Bits  9:5 ++		uint32_t cmp        : 1; // Bits 10:10 ++		uint32_t replicate  : 1; // Bits 11:11 ++		uint32_t dword      : 4; // Bits 15:12 ++		uint32_t mode       : 2; // Bits 17:16 ++		uint32_t mpmap      : 6; // Bits 23:18 ++		uint32_t mpb_offset : 4; // Bits 27:24 ++		uint32_t stage_en   : 1; // Bits 28:28 ++		uint32_t shadow     : 2; // Bits 30:29 ++		uint32_t            : 1; // Bits 31:31 ++	}; ++	uint32_t raw; ++}; ++ + union mcscheds_dft_misc_reg { + 	struct __packed { + 		uint32_t wdar                 :  1; // Bits  0:0 +diff --git a/src/northbridge/intel/haswell/native_raminit/setup_wdb.c b/src/northbridge/intel/haswell/native_raminit/setup_wdb.c +new file mode 100644 +index 0000000000..ec37c48415 +--- /dev/null ++++ b/src/northbridge/intel/haswell/native_raminit/setup_wdb.c +@@ -0,0 +1,159 @@ ++/* SPDX-License-Identifier: GPL-2.0-or-later */ ++ ++#include <northbridge/intel/haswell/haswell.h> ++#include <types.h> ++ ++#include "raminit_native.h" ++ ++static void ldat_write_cacheline( ++	const struct sysinfo *const ctrl, ++	const uint8_t chunk, ++	const uint16_t start, ++	const uint64_t data) ++{ ++	for (uint8_t channel = 0; channel < NUM_CHANNELS; channel++) { ++		if (!does_ch_exist(ctrl, channel)) ++			continue; ++ ++		/* ++		 * Do not do a 64-bit write here. The register is not aligned ++		 * to a 64-bit boundary, which could potentially cause issues. ++		 */ ++		mchbar_write32(QCLK_ch_LDAT_DATA_IN_x(channel, 0), data & UINT32_MAX); ++		mchbar_write32(QCLK_ch_LDAT_DATA_IN_x(channel, 1), data >> 32); ++		/* ++		 * Set REPLICATE = 0 as you don't want to replicate the data. ++		 * Set BANK_SEL to the chunk you want to write the 64 bits to. ++		 * Set ARRAY_SEL = 0 (the MC WDB) and MODE = 1. ++		 */ ++		const union ldat_sdat_reg ldat_sdat = { ++			.bank_sel = chunk, ++			.mode     = 1, ++		}; ++		mchbar_write32(QCLK_ch_LDAT_SDAT(channel), ldat_sdat.raw); ++		/* ++		 * Finally, write the PDAT register indicating which cacheline ++		 * of the WDB you want to write to by setting FAST_ADDR field ++		 * to one of the 64 cache lines. Also set CMD_B in the PDAT ++		 * register to 4'b1000, indicating that this is a LDAT write. ++		 */ ++		const union ldat_pdat_reg ldat_pdat = { ++			.fast_addr = MIN(start, 0xfff), ++			.cmd_b     = 8, ++		}; ++		mchbar_write32(QCLK_ch_LDAT_PDAT(channel), ldat_pdat.raw); ++	} ++} ++ ++static void clear_ldat_mode(const struct sysinfo *const ctrl) ++{ ++	for (uint8_t channel = 0; channel < NUM_CHANNELS; channel++) ++		mchbar_write32(QCLK_ch_LDAT_SDAT(channel), 0); ++} ++ ++void write_wdb_fixed_pat( ++	const struct sysinfo *const ctrl, ++	const uint8_t patterns[], ++	const uint8_t pat_mask[], ++	const uint8_t spread, ++	const uint16_t start) ++{ ++	for (uint8_t chunk = 0; chunk < WDB_CACHE_LINE_SIZE; chunk++) { ++		uint64_t data = 0; ++		for (uint8_t b = 0; b < 64; b++) { ++			const uint8_t beff  = b % spread; ++			const uint8_t burst = patterns[pat_mask[beff]]; ++			if (burst & BIT(chunk)) ++				data |= 1ULL << b; ++		} ++		ldat_write_cacheline(ctrl, chunk, start, data); ++	} ++	clear_ldat_mode(ctrl); ++} ++ ++static inline uint32_t rol_u32(const uint32_t val) ++{ ++	return (val << 1) | ((val >> 31) & 1); ++} ++ ++void write_wdb_va_pat( ++	const struct sysinfo *const ctrl, ++	const uint32_t agg_mask, ++	const uint32_t vic_mask, ++	const uint8_t vic_rot, ++	const uint16_t start) ++{ ++	static const uint8_t va_mask_to_compressed[4] = {0xaa, 0xc0, 0xcc, 0xf0}; ++	uint32_t v_mask = vic_mask; ++	uint32_t a_mask = agg_mask; ++	for (uint8_t v = 0; v < vic_rot; v++) { ++		uint8_t compressed[32] = {0}; ++		/* Iterate through all 32 bits and create a compressed version of cacheline */ ++		for (uint8_t b = 0; b < ARRAY_SIZE(compressed); b++) { ++			const uint8_t vic = !!(v_mask & BIT(b)); ++			const uint8_t agg = !!(a_mask & BIT(b)); ++			const uint8_t index = !vic << 1 | agg << 0; ++			compressed[b] = va_mask_to_compressed[index]; ++		} ++		for (uint8_t chunk = 0; chunk < WDB_CACHE_LINE_SIZE; chunk++) { ++			uint32_t data = 0; ++			for (uint8_t b = 0; b < ARRAY_SIZE(compressed); b++) ++				data |= !!(compressed[b] & BIT(chunk)) << b; ++ ++			const uint64_t data64 = (uint64_t)data << 32 | data; ++			ldat_write_cacheline(ctrl, chunk, start + v, data64); ++		} ++		v_mask = rol_u32(v_mask); ++		a_mask = rol_u32(a_mask); ++	} ++	clear_ldat_mode(ctrl); ++} ++ ++void program_wdb_lfsr(const struct sysinfo *ctrl, const bool cleanup) ++{ ++	/* Cleanup LFSR seeds are sequential */ ++	const uint32_t cleanup_seeds[NUM_WDB_CL_MUX_SEEDS] = { 0xaaaaaa, 0xcccccc, 0xf0f0f0 }; ++	const uint32_t regular_seeds[NUM_WDB_CL_MUX_SEEDS] = { 0xa10ca1, 0xef0d08, 0xad0a1e }; ++	const uint32_t *seeds = cleanup ? cleanup_seeds : regular_seeds; ++ ++	for (uint8_t channel = 0; channel < NUM_CHANNELS; channel++) { ++		if (!does_ch_exist(ctrl, channel)) ++			continue; ++ ++		for (uint8_t i = 0; i < NUM_WDB_CL_MUX_SEEDS; i++) { ++			mchbar_write32(REUT_ch_PAT_WDB_CL_MUX_RD_x(channel, i), seeds[i]); ++			mchbar_write32(REUT_ch_PAT_WDB_CL_MUX_WR_x(channel, i), seeds[i]); ++		} ++	} ++} ++ ++void setup_wdb(const struct sysinfo *ctrl) ++{ ++	const uint32_t amask[9] = { ++		0x86186186, 0x18618618, 0x30c30c30, ++		0xa28a28a2, 0x8a28a28a, 0x14514514, ++		0x28a28a28, 0x92492492, 0x24924924, ++	}; ++	const uint32_t vmask = 0x41041041; ++ ++	/* Fill first 8 entries with simple 2-LFSR VA pattern */ ++	write_wdb_va_pat(ctrl, 0, BASIC_VA_PAT_SPREAD_8, 8, 0); ++ ++	/* Fill next 54 entries with 3-LFSR VA pattern */ ++	for (uint8_t a = 0; a < ARRAY_SIZE(amask); a++) ++		write_wdb_va_pat(ctrl, amask[a], vmask, 6, 8 + a * 6); ++ ++	program_wdb_lfsr(ctrl, false); ++	for (uint8_t channel = 0; channel < NUM_CHANNELS; channel++) { ++		if (!does_ch_exist(ctrl, channel)) ++			continue; ++ ++		const union reut_pat_cl_mux_lmn_reg wdb_cl_mux_lmn = { ++			.en_sweep_freq = 1, ++			.l_counter     = 1, ++			.m_counter     = 1, ++			.n_counter     = 10, ++		}; ++		mchbar_write32(REUT_ch_PAT_WDB_CL_MUX_LMN(channel), wdb_cl_mux_lmn.raw); ++	} ++} +diff --git a/src/northbridge/intel/haswell/registers/mchbar.h b/src/northbridge/intel/haswell/registers/mchbar.h +index 4fc78a7f43..f8408e51a0 100644 +--- a/src/northbridge/intel/haswell/registers/mchbar.h ++++ b/src/northbridge/intel/haswell/registers/mchbar.h +@@ -94,6 +94,11 @@ + #define TC_BANK_RANK_D_ch(ch)			_MCMAIN_C(0x4014, ch) + #define SC_ROUNDT_LAT_ch(ch)			_MCMAIN_C(0x4024, ch) +  ++#define REUT_ch_PAT_WDB_CL_MUX_WR_x(ch, x)	_MCMAIN_C_X(0x4048, ch, x) /* x in 0 .. 2 */ ++#define REUT_ch_PAT_WDB_CL_MUX_RD_x(ch, x)	_MCMAIN_C_X(0x4054, ch, x) /* x in 0 .. 2 */ ++ ++#define REUT_ch_PAT_WDB_CL_MUX_LMN(ch)		_MCMAIN_C(0x4078, ch) ++ + #define SC_WR_ADD_DELAY_ch(ch)			_MCMAIN_C(0x40d0, ch) +  + #define REUT_ch_MISC_CKE_CTRL(ch)		_MCMAIN_C(0x4190, ch) +@@ -110,6 +115,10 @@ + #define MC_INIT_STATE_ch(ch)			_MCMAIN_C(0x42a0, ch) + #define TC_SRFTP_ch(ch)				_MCMAIN_C(0x42a4, ch) +  ++#define QCLK_ch_LDAT_PDAT(ch)			_MCMAIN_C(0x42d0, ch) ++#define QCLK_ch_LDAT_SDAT(ch)			_MCMAIN_C(0x42d4, ch) ++#define QCLK_ch_LDAT_DATA_IN_x(ch, x)		_MCMAIN_C_X(0x42dc, ch, x) /* x in 0 .. 1 */ ++ + #define REUT_GLOBAL_ERR				0x4804 +  + #define REUT_ch_SEQ_CFG(ch)			(0x48a8 + 8 * (ch)) +--  +2.39.2 + diff --git a/config/coreboot/haswell/patches/0018-haswell-NRI-Add-REUT-I-O-test-library.patch b/config/coreboot/haswell/patches/0018-haswell-NRI-Add-REUT-I-O-test-library.patch new file mode 100644 index 00000000..5df22ed3 --- /dev/null +++ b/config/coreboot/haswell/patches/0018-haswell-NRI-Add-REUT-I-O-test-library.patch @@ -0,0 +1,1128 @@ +From f4dd460d609276de7cb7db91f145a404451a2301 Mon Sep 17 00:00:00 2001 +From: Angel Pons <th3fanbus@gmail.com> +Date: Sun, 8 May 2022 00:11:29 +0200 +Subject: [PATCH 18/26] haswell NRI: Add REUT I/O test library + +Implement a library to run I/O tests using the REUT hardware. + +Change-Id: Id7b207cd0a3989ddd23c88c6b1f0cfa79d2c861f +Signed-off-by: Angel Pons <th3fanbus@gmail.com> +--- + .../intel/haswell/native_raminit/Makefile.inc |   1 + + .../haswell/native_raminit/raminit_native.h   | 110 +++ + .../haswell/native_raminit/reg_structs.h      | 121 +++ + .../intel/haswell/native_raminit/testing_io.c | 742 ++++++++++++++++++ + .../intel/haswell/registers/mchbar.h          |  30 + + 5 files changed, 1004 insertions(+) + create mode 100644 src/northbridge/intel/haswell/native_raminit/testing_io.c + +diff --git a/src/northbridge/intel/haswell/native_raminit/Makefile.inc b/src/northbridge/intel/haswell/native_raminit/Makefile.inc +index 8d7d4e4db0..6e1b365602 100644 +--- a/src/northbridge/intel/haswell/native_raminit/Makefile.inc ++++ b/src/northbridge/intel/haswell/native_raminit/Makefile.inc +@@ -12,4 +12,5 @@ romstage-y += raminit_native.c + romstage-y += reut.c + romstage-y += setup_wdb.c + romstage-y += spd_bitmunching.c ++romstage-y += testing_io.c + romstage-y += timings_refresh.c +diff --git a/src/northbridge/intel/haswell/native_raminit/raminit_native.h b/src/northbridge/intel/haswell/native_raminit/raminit_native.h +index f29c2ec366..56df36ca8d 100644 +--- a/src/northbridge/intel/haswell/native_raminit/raminit_native.h ++++ b/src/northbridge/intel/haswell/native_raminit/raminit_native.h +@@ -58,6 +58,88 @@ enum { + 	REUT_MODE_NOP  = 3, /* Normal operation mode */ + }; +  ++/* REUT error counter control */ ++enum { ++	COUNT_ERRORS_PER_CHANNEL	= 0, ++	COUNT_ERRORS_PER_LANE		= 1, ++	COUNT_ERRORS_PER_BYTE_GROUP	= 2, ++	COUNT_ERRORS_PER_CHUNK		= 3, ++}; ++ ++enum wdb_dq_pattern { ++	BASIC_VA = 0, ++	SEGMENT_WDB, ++	CADB, ++	TURN_AROUND, ++	LMN_VA, ++	TURN_AROUND_WR, ++	TURN_AROUND_ODT, ++	RD_RD_TA, ++	RD_RD_TA_ALL, ++}; ++ ++enum reut_cmd_pat { ++	PAT_WR_RD, ++	PAT_WR, ++	PAT_RD, ++	PAT_RD_WR_TA, ++	PAT_WR_RD_TA, ++	PAT_ODT_TA, ++}; ++ ++/* REUT subsequence types (B = Base, O = Offset) */ ++enum { ++	SUBSEQ_B_RD	= 0 << 22, ++	SUBSEQ_B_WR	= 1 << 22, ++	SUBSEQ_B_RD_WR	= 2 << 22, ++	SUBSEQ_B_WR_RD	= 3 << 22, ++	SUBSEQ_O_RD	= 4 << 22, ++	SUBSEQ_O_WR	= 5 << 22, ++}; ++ ++/* REUT mux control */ ++enum { ++	REUT_MUX_LMN      = 0, ++	REUT_MUX_BTBUFFER = 1, ++	REUT_MUX_LFSR     = 2, ++}; ++ ++/* Increment scale */ ++enum { ++	SCALE_LOGARITHM	= 0, ++	SCALE_LINEAR	= 1, ++}; ++ ++enum test_stop { ++	NSOE   = 0,	/* Never stop on error */ ++	NTHSOE = 1,	/* Stop on the nth error (we use n = 1) */ ++	ABGSOE = 2,	/* Stop on all byte groups error */ ++	ALSOE  = 3,	/* Stop on all lanes error */ ++}; ++ ++struct wdb_pat { ++	uint32_t start_ptr;	/* Starting pointer in WDB */ ++	uint32_t stop_ptr;	/* Stopping pointer in WDB */ ++	uint16_t inc_rate;	/* How quickly the WDB walks through cachelines */ ++	uint8_t  dq_pattern;	/* DQ pattern to use (see enum wdb_dq_pattern above) */ ++}; ++ ++struct reut_pole { ++	uint16_t start; ++	uint16_t stop; ++	uint16_t order; ++	uint32_t inc_rate; ++	uint16_t inc_val; ++	bool wrap_trigger; ++}; ++ ++struct reut_box { ++	struct reut_pole rank; ++	struct reut_pole bank; ++	struct reut_pole row; ++	struct reut_pole col; ++}; ++ + enum command_training_iteration { + 	CT_ITERATION_CLOCK = 0, + 	CT_ITERATION_CMD_NORTH, +@@ -199,6 +281,10 @@ struct sysinfo { + 	uint16_t mr1[NUM_CHANNELS][NUM_SLOTRANKS]; + 	uint16_t mr2[NUM_CHANNELS][NUM_SLOTRANKS]; + 	uint16_t mr3[NUM_CHANNELS][NUM_SLOTRANKS]; ++ ++	uint8_t dq_pat; ++ ++	uint8_t dq_pat_lc; + }; +  + static inline bool is_hsw_ult(void) +@@ -342,6 +428,30 @@ void write_wdb_va_pat( + void program_wdb_lfsr(const struct sysinfo *ctrl, bool cleanup); + void setup_wdb(const struct sysinfo *ctrl); +  ++void program_seq_addr(uint8_t channel, const struct reut_box *reut_addr, bool log_seq_addr); ++void program_loop_count(const struct sysinfo *ctrl, uint8_t channel, uint8_t lc_exp); ++ ++void setup_io_test( ++	struct sysinfo *ctrl, ++	uint8_t chanmask, ++	enum reut_cmd_pat cmd_pat, ++	uint16_t num_cl, ++	uint8_t lc, ++	const struct reut_box *reut_addr, ++	enum test_stop soe, ++	const struct wdb_pat *pat, ++	uint8_t en_cadb, ++	uint8_t subseq_wait); ++ ++void setup_io_test_cadb(struct sysinfo *ctrl, uint8_t chanmask, uint8_t lc, enum test_stop soe); ++void setup_io_test_basic_va(struct sysinfo *ctrl, uint8_t chm, uint8_t lc, enum test_stop soe); ++void setup_io_test_mpr(struct sysinfo *ctrl, uint8_t chanmask, uint8_t lc, enum test_stop soe); ++ ++uint8_t select_reut_ranks(struct sysinfo *ctrl, uint8_t channel, uint8_t rankmask); ++ ++void run_mpr_io_test(bool clear_errors); ++uint8_t run_io_test(struct sysinfo *ctrl, uint8_t chanmask, uint8_t dq_pat, bool clear_errors); ++ + uint8_t get_rx_bias(const struct sysinfo *ctrl); +  + uint8_t get_tCWL(uint32_t mem_clock_mhz); +diff --git a/src/northbridge/intel/haswell/native_raminit/reg_structs.h b/src/northbridge/intel/haswell/native_raminit/reg_structs.h +index 7aa8d8c8b2..b943259b91 100644 +--- a/src/northbridge/intel/haswell/native_raminit/reg_structs.h ++++ b/src/northbridge/intel/haswell/native_raminit/reg_structs.h +@@ -347,6 +347,54 @@ union reut_pat_cl_mux_lmn_reg { + 	uint32_t raw; + }; +  ++union reut_err_ctl_reg { ++	struct __packed { ++		uint32_t stop_on_nth_error              : 6; // Bits  5:0 ++		uint32_t                                : 6; // Bits 11:6 ++		uint32_t stop_on_error_control          : 2; // Bits 13:12 ++		uint32_t                                : 2; // Bits 15:14 ++		uint32_t selective_err_enable_chunk     : 8; // Bits 23:16 ++		uint32_t selective_err_enable_cacheline : 8; // Bits 31:24 ++	}; ++	uint32_t raw; ++}; ++ ++union reut_pat_cadb_mux_ctrl_reg { ++	struct __packed { ++		uint32_t mux_0_ctrl     : 2; // Bits  1:0 ++		uint32_t                : 2; // Bits  3:2 ++		uint32_t mux_1_ctrl     : 2; // Bits  5:4 ++		uint32_t                : 2; // Bits  7:6 ++		uint32_t mux_2_ctrl     : 2; // Bits  9:8 ++		uint32_t                : 6; // Bits 15:10 ++		uint32_t sel_mux_0_ctrl : 2; // Bits 17:16 ++		uint32_t                : 2; // Bits 19:18 ++		uint32_t sel_mux_1_ctrl : 2; // Bits 21:20 ++		uint32_t                : 2; // Bits 23:22 ++		uint32_t sel_mux_2_ctrl : 2; // Bits 25:24 ++		uint32_t                : 6; // Bits 31:26 ++	}; ++	uint32_t raw; ++}; ++ ++union reut_pat_wdb_cl_mux_cfg_reg { ++	struct __packed { ++		uint32_t mux_0_control         : 2; // Bits  1:0 ++		uint32_t                       : 1; // Bits  2:2 ++		uint32_t mux_1_control         : 2; // Bits  4:3 ++		uint32_t                       : 1; // Bits  5:5 ++		uint32_t mux_2_control         : 2; // Bits  7:6 ++		uint32_t                       : 6; // Bits 13:8 ++		uint32_t ecc_replace_byte_ctl  : 1; // Bits 14:14 ++		uint32_t ecc_data_source_sel   : 1; // Bits 15:15 ++		uint32_t save_lfsr_seed_rate   : 6; // Bits 21:16 ++		uint32_t                       : 2; // Bits 23:22 ++		uint32_t reload_lfsr_seed_rate : 3; // Bits 26:24 ++		uint32_t                       : 5; // Bits 31:27 ++	}; ++	uint32_t raw; ++}; ++ + union reut_pat_cadb_prog_reg { + 	struct __packed { + 		uint32_t addr : 16; // Bits 15:0 +@@ -366,6 +414,19 @@ union reut_pat_cadb_prog_reg { + 	uint32_t raw32[2]; + }; +  ++union reut_pat_wdb_cl_ctrl_reg { ++	struct __packed { ++		uint32_t inc_rate  :  5; // Bits 4:0 ++		uint32_t inc_scale :  1; // Bits 5:5 ++		uint32_t           :  2; // Bits 7:6 ++		uint32_t start_ptr :  6; // Bits 13:8 ++		uint32_t           :  2; // Bits 15:14 ++		uint32_t end_ptr   :  6; // Bits 21:16 ++		uint32_t           : 10; // Bits 31:22 ++	}; ++	uint32_t raw; ++}; ++ + union reut_pat_cadb_mrs_reg { + 	struct __packed { + 		uint32_t delay_gap : 3; // Bits  2:0 +@@ -406,6 +467,66 @@ union reut_seq_cfg_reg { + 	uint32_t raw32[2]; + }; +  ++union reut_seq_base_addr_reg { ++	struct __packed { ++		uint32_t           :  3; // Bits  2:0 ++		uint32_t col_addr  :  8; // Bits 10:3 ++		uint32_t           : 13; // Bits 23:11 ++		uint32_t row_addr  : 16; // Bits 39:24 ++		uint32_t           :  8; // Bits 47:40 ++		uint32_t bank_addr :  3; // Bits 50:48 ++		uint32_t           :  5; // Bits 55:51 ++		uint32_t rank_addr :  3; // Bits 58:56 ++		uint32_t           :  5; // Bits 63:59 ++	}; ++	uint32_t raw32[2]; ++	uint64_t raw; ++}; ++ ++union reut_seq_misc_ctl_reg { ++	struct __packed { ++		uint32_t col_addr_order       : 2; // Bits  1:0 ++		uint32_t row_addr_order       : 2; // Bits  3:2 ++		uint32_t bank_addr_order      : 2; // Bits  5:4 ++		uint32_t rank_addr_order      : 2; // Bits  7:6 ++		uint32_t                      : 5; // Bits 12:8 ++		uint32_t addr_invert_rate     : 3; // Bits 15:13 ++		uint32_t                      : 4; // Bits 19:16 ++		uint32_t col_addr_invert_en   : 1; // Bits 20:20 ++		uint32_t row_addr_invert_en   : 1; // Bits 21:21 ++		uint32_t bank_addr_invert_en  : 1; // Bits 22:22 ++		uint32_t rank_addr_invert_en  : 1; // Bits 23:23 ++		uint32_t col_wrap_trigger_en  : 1; // Bits 24:24 ++		uint32_t row_wrap_trigger_en  : 1; // Bits 25:25 ++		uint32_t bank_wrap_trigger_en : 1; // Bits 26:26 ++		uint32_t rank_wrap_trigger_en : 1; // Bits 27:27 ++		uint32_t col_wrap_carry_en    : 1; // Bits 28:28 ++		uint32_t row_wrap_carry_en    : 1; // Bits 29:29 ++		uint32_t bank_wrap_carry_en   : 1; // Bits 30:30 ++		uint32_t rank_wrap_carry_en   : 1; // Bits 31:31 ++	}; ++	uint32_t raw; ++}; ++ ++union reut_seq_addr_inc_ctl_reg { ++	struct __packed { ++		uint32_t                     :  3; // Bits  2:0 ++		uint32_t col_addr_increment  :  8; // Bits 10:3 ++		uint32_t                     :  1; // Bits 11:11 ++		uint32_t col_addr_update     :  8; // Bits 19:12 ++		uint32_t row_addr_increment  : 12; // Bits 31:20 ++		uint32_t row_addr_update     :  6; // Bits 37:32 ++		uint32_t bank_addr_increment :  3; // Bits 40:38 ++		uint32_t                     :  3; // Bits 43:41 ++		uint32_t bank_addr_update    :  8; // Bits 53:44 ++		uint32_t rank_addr_increment :  3; // Bits 54:52 ++		uint32_t                     :  1; // Bits 55:55 ++		uint32_t rank_addr_update    :  8; // Bits 63:56 ++	}; ++	uint64_t raw; ++	uint32_t raw32[2]; ++}; ++ + union reut_seq_ctl_reg { + 	struct __packed { + 		uint32_t start_test    :  1; // Bits  0:0 +diff --git a/src/northbridge/intel/haswell/native_raminit/testing_io.c b/src/northbridge/intel/haswell/native_raminit/testing_io.c +new file mode 100644 +index 0000000000..7716fc4285 +--- /dev/null ++++ b/src/northbridge/intel/haswell/native_raminit/testing_io.c +@@ -0,0 +1,742 @@ ++/* SPDX-License-Identifier: GPL-2.0-or-later */ ++ ++#include <console/console.h> ++#include <delay.h> ++#include <lib.h> ++#include <northbridge/intel/haswell/haswell.h> ++#include <timer.h> ++#include <types.h> ++ ++#include "raminit_native.h" ++ ++static void set_cadb_patterns(const uint8_t channel, const uint16_t seeds[NUM_CADB_MUX_SEEDS]) ++{ ++	for (uint8_t i = 0; i < NUM_CADB_MUX_SEEDS; i++) ++		mchbar_write32(REUT_ch_PAT_CADB_MUX_x(channel, i), seeds[i]); ++} ++ ++static void setup_cadb( ++	struct sysinfo *ctrl, ++	const uint8_t channel, ++	const uint8_t vic_spread, ++	const uint8_t vic_bit) ++{ ++	const bool lmn_en = false; ++ ++	/* ++	 * Currently, always start writing at CADB row 0. ++	 * Could add a start point parameter in the future. ++	 */ ++	mchbar_write8(REUT_ch_PAT_CADB_WRITE_PTR(channel), 0); ++	const uint8_t num_cadb_rows = 8; ++	for (uint8_t row = 0; row < num_cadb_rows; row++) { ++		const uint8_t lfsr0 = (row >> 0) & 1; ++		const uint8_t lfsr1 = (row >> 1) & 1; ++		uint64_t reg64 = 0; ++		for (uint8_t bit = 0; bit < 22; bit++) { ++			uint8_t bremap; ++			if (bit >= 19) { ++				/* (bremap in 40 .. 42) => CADB data control */ ++				bremap = bit + 21; ++			} else if (bit >= 16) { ++				/* (bremap in 24 .. 26) => CADB data bank */ ++				bremap = bit + 8; ++			} else { ++				/* (bremap in  0 .. 15) => CADB data address */ ++				bremap = bit; ++			} ++			const uint8_t fine = bit % vic_spread; ++			reg64 |= ((uint64_t)(fine == vic_bit ? lfsr0 : lfsr1)) << bremap; ++		} ++		/* ++		 * Write row. CADB pointer is auto incremented after every write. This must be ++		 * a single 64-bit write, otherwise the CADB pointer will auto-increment twice. ++		 */ ++		mchbar_write64(REUT_ch_PAT_CADB_PROG(channel), reg64); ++	} ++	const union reut_pat_cadb_mux_ctrl_reg cadb_mux_ctrl = { ++		.mux_0_ctrl = lmn_en ? REUT_MUX_LMN : REUT_MUX_LFSR, ++		.mux_1_ctrl = REUT_MUX_LFSR, ++		.mux_2_ctrl = REUT_MUX_LFSR, ++	}; ++	mchbar_write32(REUT_ch_PAT_CADB_MUX_CTRL(channel), cadb_mux_ctrl.raw); ++	const union reut_pat_cl_mux_lmn_reg cadb_cl_mux_lmn = { ++		.en_sweep_freq = 1, ++		.l_counter     = 1, ++		.m_counter     = 1, ++		.n_counter     = 6, ++	}; ++	mchbar_write32(REUT_ch_PAT_CADB_CL_MUX_LMN(channel), cadb_cl_mux_lmn.raw); ++	const uint16_t cadb_mux_seeds[NUM_CADB_MUX_SEEDS] = { 0x0ea1, 0xbeef, 0xdead }; ++	set_cadb_patterns(channel, cadb_mux_seeds); ++} ++ ++static uint32_t calc_rate(const uint32_t rate, const uint32_t lim, const uint8_t scale_bit) ++{ ++	return rate > lim ? log2_ceil(rate - 1) : BIT(scale_bit) | rate; ++} ++ ++void program_seq_addr( ++	const uint8_t channel, ++	const struct reut_box *reut_addr, ++	const bool log_seq_addr) ++{ ++	const int loglevel = log_seq_addr ? BIOS_ERR : BIOS_NEVER; ++	const uint32_t div = 8; ++	union reut_seq_base_addr_reg reut_seq_addr_start = { ++		.col_addr  = reut_addr->col.start / div, ++		.row_addr  = reut_addr->row.start, ++		.bank_addr = reut_addr->bank.start, ++		.rank_addr = reut_addr->rank.start, ++	}; ++	mchbar_write64(REUT_ch_SEQ_ADDR_START(channel), reut_seq_addr_start.raw); ++	reut_seq_addr_start.raw = mchbar_read64(REUT_ch_SEQ_ADDR_START(channel)); ++	printk(loglevel, "\tStart column:     %u\n", reut_seq_addr_start.col_addr); ++	printk(loglevel, "\tStart row:        %u\n", reut_seq_addr_start.row_addr); ++	printk(loglevel, "\tStart bank:       %u\n", reut_seq_addr_start.bank_addr); ++	printk(loglevel, "\tStart rank:       %u\n", reut_seq_addr_start.rank_addr); ++	printk(loglevel, "\n"); ++ ++	union reut_seq_base_addr_reg reut_seq_addr_stop = { ++		.col_addr  = reut_addr->col.stop / div, ++		.row_addr  = reut_addr->row.stop, ++		.bank_addr = reut_addr->bank.stop, ++		.rank_addr = reut_addr->rank.stop, ++	}; ++	mchbar_write64(REUT_ch_SEQ_ADDR_WRAP(channel), reut_seq_addr_stop.raw); ++	reut_seq_addr_stop.raw = mchbar_read64(REUT_ch_SEQ_ADDR_WRAP(channel)); ++	printk(loglevel, "\tStop column:      %u\n", reut_seq_addr_stop.col_addr); ++	printk(loglevel, "\tStop row:         %u\n", reut_seq_addr_stop.row_addr); ++	printk(loglevel, "\tStop bank:        %u\n", reut_seq_addr_stop.bank_addr); ++	printk(loglevel, "\tStop rank:        %u\n", reut_seq_addr_stop.rank_addr); ++	printk(loglevel, "\n"); ++ ++	union reut_seq_misc_ctl_reg reut_seq_misc_ctl = { ++		.col_wrap_trigger_en  = reut_addr->col.wrap_trigger, ++		.row_wrap_trigger_en  = reut_addr->row.wrap_trigger, ++		.bank_wrap_trigger_en = reut_addr->bank.wrap_trigger, ++		.rank_wrap_trigger_en = reut_addr->rank.wrap_trigger, ++	}; ++	mchbar_write32(REUT_ch_SEQ_MISC_CTL(channel), reut_seq_misc_ctl.raw); ++	printk(loglevel, "\tWrap column:      %u\n", reut_addr->col.wrap_trigger); ++	printk(loglevel, "\tWrap row:         %u\n", reut_addr->row.wrap_trigger); ++	printk(loglevel, "\tWrap bank:        %u\n", reut_addr->bank.wrap_trigger); ++	printk(loglevel, "\tWrap rank:        %u\n", reut_addr->rank.wrap_trigger); ++	printk(loglevel, "\n"); ++ ++	union reut_seq_addr_inc_ctl_reg reut_seq_addr_inc_ctl = { ++		.col_addr_update  = calc_rate(reut_addr->col.inc_rate,  31, 7), ++		.row_addr_update  = calc_rate(reut_addr->row.inc_rate,  15, 5), ++		.bank_addr_update = calc_rate(reut_addr->bank.inc_rate, 31, 7), ++		.rank_addr_update = calc_rate(reut_addr->rank.inc_rate, 31, 7), ++		.col_addr_increment  = reut_addr->col.inc_val, ++		.row_addr_increment  = reut_addr->row.inc_val, ++		.bank_addr_increment = reut_addr->bank.inc_val, ++		.rank_addr_increment = reut_addr->rank.inc_val, ++	}; ++	printk(loglevel, "\tUpdRate column:   %u\n", reut_addr->col.inc_rate); ++	printk(loglevel, "\tUpdRate row:      %u\n", reut_addr->row.inc_rate); ++	printk(loglevel, "\tUpdRate bank:     %u\n", reut_addr->bank.inc_rate); ++	printk(loglevel, "\tUpdRate rank:     %u\n", reut_addr->rank.inc_rate); ++	printk(loglevel, "\n"); ++	printk(loglevel, "\tUpdRateCR column: %u\n", reut_seq_addr_inc_ctl.col_addr_update); ++	printk(loglevel, "\tUpdRateCR row:    %u\n", reut_seq_addr_inc_ctl.row_addr_update); ++	printk(loglevel, "\tUpdRateCR bank:   %u\n", reut_seq_addr_inc_ctl.bank_addr_update); ++	printk(loglevel, "\tUpdRateCR rank:   %u\n", reut_seq_addr_inc_ctl.rank_addr_update); ++	printk(loglevel, "\n"); ++	printk(loglevel, "\tUpdInc column:    %u\n", reut_seq_addr_inc_ctl.col_addr_increment); ++	printk(loglevel, "\tUpdInc row:       %u\n", reut_seq_addr_inc_ctl.row_addr_increment); ++	printk(loglevel, "\tUpdInc bank:      %u\n", reut_seq_addr_inc_ctl.bank_addr_increment); ++	printk(loglevel, "\tUpdInc rank:      %u\n", reut_seq_addr_inc_ctl.rank_addr_increment); ++	printk(loglevel, "\n"); ++	mchbar_write64(REUT_ch_SEQ_ADDR_INC_CTL(channel), reut_seq_addr_inc_ctl.raw); ++} ++ ++/* ++ * Early steppings take exponential (base 2) loopcount values, ++ * but later steppings take linear loopcount values elsewhere. ++ * Address the differences in register offset and format here. ++ */ ++void program_loop_count(const struct sysinfo *ctrl, const uint8_t channel, const uint8_t lc_exp) ++{ ++	if (ctrl->stepping >= STEPPING_C0) { ++		const uint32_t loopcount = lc_exp >= 32 ? 0 : BIT(lc_exp); ++		mchbar_write32(HSW_REUT_ch_SEQ_LOOP_COUNT(channel), loopcount); ++	} else { ++		const uint8_t loopcount = lc_exp >= 32 ? 0 : lc_exp + 1; ++		union reut_seq_cfg_reg reut_seq_cfg = { ++			.raw = mchbar_read64(REUT_ch_SEQ_CFG(channel)), ++		}; ++		reut_seq_cfg.early_steppings_loop_count = loopcount; ++		mchbar_write64(REUT_ch_SEQ_CFG(channel), reut_seq_cfg.raw); ++	} ++} ++ ++static inline void write_subseq(const uint8_t channel, const uint8_t idx, const uint32_t ssq) ++{ ++	mchbar_write32(REUT_ch_SUBSEQ_x_CTL(channel, idx), ssq); ++} ++ ++static void program_subseq( ++	struct sysinfo *const ctrl, ++	const uint8_t channel, ++	const enum reut_cmd_pat cmd_pat, ++	const uint32_t ss_a, ++	const uint32_t ss_b) ++{ ++	switch (cmd_pat) { ++	case PAT_WR_RD_TA: ++		write_subseq(channel, 0, ss_a | SUBSEQ_B_WR); ++		for (uint8_t i = 1; i < 7; i++) ++			write_subseq(channel, i, ss_b | SUBSEQ_B_RD_WR); ++ ++		write_subseq(channel, 7, ss_a | SUBSEQ_B_RD); ++		break; ++	case PAT_RD_WR_TA: ++		write_subseq(channel, 0, ss_b | SUBSEQ_B_WR_RD); ++		break; ++	case PAT_ODT_TA: ++		write_subseq(channel, 0, ss_a | SUBSEQ_B_WR); ++		write_subseq(channel, 1, ss_b | SUBSEQ_B_RD_WR); ++		write_subseq(channel, 2, ss_a | SUBSEQ_B_RD); ++		write_subseq(channel, 3, ss_b | SUBSEQ_B_WR_RD); ++		break; ++	default: ++		write_subseq(channel, 0, ss_a | SUBSEQ_B_WR); ++		write_subseq(channel, 1, ss_a | SUBSEQ_B_RD); ++		break; ++	} ++} ++ ++void setup_io_test( ++	struct sysinfo *ctrl, ++	const uint8_t chanmask, ++	const enum reut_cmd_pat cmd_pat, ++	const uint16_t num_cl, ++	const uint8_t lc, ++	const struct reut_box *const reut_addr, ++	const enum test_stop soe, ++	const struct wdb_pat *const pat, ++	const uint8_t en_cadb, ++	const uint8_t subseq_wait) ++{ ++	if (!chanmask) ++		die("\n%s: invalid chanmask\n", __func__, chanmask); ++ ++	/* ++	 * Prepare variables needed for both channels. ++	 * Check for the cases where this MUST be 1: when ++	 * we manually walk through subseq ODT and TA Wr. ++	 */ ++	uint8_t lc_exp = MAX(lc - log2_ceil(num_cl), 0); ++	if (cmd_pat == PAT_WR_RD_TA || cmd_pat == PAT_ODT_TA) ++		lc_exp = 0; ++ ++	uint8_t num_clcr; ++	if (num_cl > 127) { ++		/* Assume exponential number */ ++		num_clcr = log2_ceil(num_cl); ++	} else { ++		/* Set number of cache lines as linear number */ ++		num_clcr = num_cl | BIT(7); ++	} ++ ++	const uint16_t num_cl2 = 2 * num_cl; ++	uint8_t num_cl2cr; ++	if (num_cl2 > 127) { ++		/* Assume exponential number */ ++		num_cl2cr = log2_ceil(num_cl2); ++	} else { ++		/* Set number of cache lines as linear number */ ++		num_cl2cr = num_cl2 | BIT(7); ++	} ++ ++	for (uint8_t channel = 0; channel < NUM_CHANNELS; channel++) { ++		if (!(chanmask & BIT(channel))) { ++			union reut_seq_cfg_reg reut_seq_cfg = { ++				.raw = mchbar_read64(REUT_ch_SEQ_CFG(channel)), ++			}; ++			reut_seq_cfg.global_control = 0; ++			mchbar_write64(REUT_ch_SEQ_CFG(channel), reut_seq_cfg.raw); ++			continue; ++		} ++ ++		/* ++		 * Program CADB ++		 */ ++		mchbar_write8(REUT_ch_MISC_PAT_CADB_CTRL(channel), !!en_cadb); ++		if (en_cadb) ++			setup_cadb(ctrl, channel, 7, 8); ++ ++		/* ++		 * Program sequence ++		 */ ++		uint8_t subseq_start = 0; ++		uint8_t subseq_end   = 0; ++		switch (cmd_pat) { ++		case PAT_WR_RD: ++			subseq_end = 1; ++			break; ++		case PAT_WR: ++			break; ++		case PAT_RD: ++			subseq_start = 1; ++			subseq_end   = 1; ++			break; ++		case PAT_RD_WR_TA: ++			break; ++		case PAT_WR_RD_TA: ++			subseq_end = 7; ++			break; ++		case PAT_ODT_TA: ++			subseq_end = 3; ++			break; ++		default: ++			die("\n%s: Pattern type %u is invalid\n", __func__, cmd_pat); ++		} ++		const union reut_seq_cfg_reg reut_seq_cfg = { ++			.global_control            = 1, ++			.initialization_mode       = REUT_MODE_TEST, ++			.subsequence_start_pointer = subseq_start, ++			.subsequence_end_pointer   = subseq_end, ++			.start_test_delay          = 2, ++		}; ++		mchbar_write64(REUT_ch_SEQ_CFG(channel), reut_seq_cfg.raw); ++		program_loop_count(ctrl, channel, lc_exp); ++		mchbar_write32(REUT_ch_SEQ_CTL(channel), (union reut_seq_ctl_reg) { ++			.clear_errors = 1, ++		}.raw); ++ ++		/* ++		 * Program subsequences ++		 */ ++		uint32_t subseq_a = 0; ++ ++		/* Number of cachelines and scale */ ++		subseq_a |= (num_clcr    & 0x00ff) << 0; ++		subseq_a |= (subseq_wait & 0x3fff) << 8; ++ ++		/* Reset current base address to start */ ++		subseq_a |= BIT(27); ++ ++		uint32_t subseq_b = 0; ++ ++		/* Number of cachelines and scale */ ++		subseq_b |= (num_cl2cr   & 0x00ff) << 0; ++		subseq_b |= (subseq_wait & 0x3fff) << 8; ++ ++		/* Reset current base address to start */ ++		subseq_b |= BIT(27); ++ ++		program_subseq(ctrl, channel, cmd_pat, subseq_a, subseq_b); ++ ++		/* Program sequence address */ ++		program_seq_addr(channel, reut_addr, false); ++ ++		/* Program WDB */ ++		const bool is_linear = pat->inc_rate < 32; ++		mchbar_write32(REUT_ch_WDB_CL_CTRL(channel), (union reut_pat_wdb_cl_ctrl_reg) { ++			.start_ptr = pat->start_ptr, ++			.end_ptr   = pat->stop_ptr, ++			.inc_rate  = is_linear ? pat->inc_rate : log2_ceil(pat->inc_rate), ++			.inc_scale = is_linear, ++		}.raw); ++ ++		/* Enable LMN in LMN or CADB modes, used to create lots of supply noise */ ++		const bool use_lmn = pat->dq_pattern == LMN_VA || pat->dq_pattern == CADB; ++		union reut_pat_wdb_cl_mux_cfg_reg pat_wdb_cl_mux_cfg = { ++			.mux_0_control = use_lmn ? REUT_MUX_LMN : REUT_MUX_LFSR, ++			.mux_1_control = REUT_MUX_LFSR, ++			.mux_2_control = REUT_MUX_LFSR, ++			.ecc_data_source_sel = 1, ++		}; ++ ++		/* Program LFSR save/restore, too complex unless everything is power of 2 */ ++		if (cmd_pat == PAT_ODT_TA || cmd_pat == PAT_WR_RD_TA) { ++			pat_wdb_cl_mux_cfg.reload_lfsr_seed_rate = log2_ceil(num_cl) + 1; ++			pat_wdb_cl_mux_cfg.save_lfsr_seed_rate   = 1; ++		} ++		mchbar_write32(REUT_ch_PAT_WDB_CL_MUX_CFG(channel), pat_wdb_cl_mux_cfg.raw); ++ ++		/* Inversion mask is not used */ ++		mchbar_write32(REUT_ch_PAT_WDB_INV(channel), 0); ++ ++		/* Program error checking */ ++		const union reut_err_ctl_reg reut_err_ctl = { ++			.selective_err_enable_cacheline = 0xff, ++			.selective_err_enable_chunk     = 0xff, ++			.stop_on_error_control          = soe, ++			.stop_on_nth_error              = 1, ++		}; ++		mchbar_write32(REUT_ch_ERR_CONTROL(channel), reut_err_ctl.raw); ++		mchbar_write64(REUT_ch_ERR_DATA_MASK(channel), 0); ++		mchbar_write8(REUT_ch_ERR_ECC_MASK(channel), 0); ++	} ++ ++	/* Always do a ZQ short before the beginning of a test */ ++	reut_issue_zq(ctrl, chanmask, ZQ_SHORT); ++} ++ ++void setup_io_test_cadb( ++	struct sysinfo *ctrl, ++	const uint8_t chanmask, ++	const uint8_t lc, ++	const enum test_stop soe) ++{ ++	const struct reut_box reut_addr = { ++		.rank = { ++			.start    = 0, ++			.stop     = 0, ++			.inc_rate = 32, ++			.inc_val  = 1, ++		}, ++		.bank = { ++			.start    = 0, ++			.stop     = 7, ++			.inc_rate = 3, ++			.inc_val  = 1, ++		}, ++		.row = { ++			.start    = 0, ++			.stop     = 2047, ++			.inc_rate = 3, ++			.inc_val  = 73, ++		}, ++		.col = { ++			.start    = 0, ++			.stop     = 1023, ++			.inc_rate = 0, ++			.inc_val  = 53, ++		}, ++	}; ++	const struct wdb_pat pattern = { ++		.start_ptr  = 0, ++		.stop_ptr   = 9, ++		.inc_rate   = 4, ++		.dq_pattern = CADB, ++	}; ++	setup_io_test( ++		ctrl, ++		chanmask, ++		PAT_WR_RD, ++		128, ++		lc, ++		&reut_addr, ++		soe, ++		&pattern, ++		1, ++		0); ++ ++	ctrl->dq_pat_lc = MAX(lc - 2 - 3, 0) + 1; ++	ctrl->dq_pat = CADB; ++} ++ ++void setup_io_test_basic_va( ++	struct sysinfo *ctrl, ++	const uint8_t chanmask, ++	const uint8_t lc, ++	const enum test_stop soe) ++{ ++	const uint32_t spread = 8; ++	const struct reut_box reut_addr = { ++		.rank = { ++			.start    = 0, ++			.stop     = 0, ++			.inc_rate = 32, ++			.inc_val  = 1, ++		}, ++		.col = { ++			.start    = 0, ++			.stop     = 1023, ++			.inc_rate = 0, ++			.inc_val  = 1, ++		}, ++	}; ++	const struct wdb_pat pattern = { ++		.start_ptr  = 0, ++		.stop_ptr   = spread - 1, ++		.inc_rate   = 4, ++		.dq_pattern = BASIC_VA, ++	}; ++	setup_io_test( ++		ctrl, ++		chanmask, ++		PAT_WR_RD, ++		128, ++		lc, ++		&reut_addr, ++		soe, ++		&pattern, ++		0, ++		0); ++ ++	ctrl->dq_pat_lc = MAX(lc - 8, 0) + 1; ++	ctrl->dq_pat = BASIC_VA; ++} ++ ++void setup_io_test_mpr( ++	struct sysinfo *ctrl, ++	const uint8_t chanmask, ++	const uint8_t lc, ++	const enum test_stop soe) ++{ ++	const struct reut_box reut_addr_ddr = { ++		.rank = { ++			.start    = 0, ++			.stop     = 0, ++			.inc_rate = 32, ++			.inc_val  = 1, ++		}, ++		.col = { ++			.start    = 0, ++			.stop     = 1023, ++			.inc_rate = 0, ++			.inc_val  = 1, ++		}, ++	}; ++	const struct reut_box reut_addr_lpddr = { ++		.bank = { ++			.start    = 4, ++			.stop     = 4, ++			.inc_rate = 0, ++			.inc_val  = 0, ++		}, ++	}; ++	const struct wdb_pat pattern = { ++		.start_ptr  = 0, ++		.stop_ptr   = 9, ++		.inc_rate   = 4, ++		.dq_pattern = BASIC_VA, ++	}; ++	setup_io_test( ++		ctrl, ++		chanmask, ++		PAT_RD, ++		128, ++		lc, ++		ctrl->lpddr ? &reut_addr_lpddr : &reut_addr_ddr, ++		soe, ++		&pattern, ++		0, ++		0); ++ ++	ctrl->dq_pat_lc = 1; ++	ctrl->dq_pat = BASIC_VA; ++} ++ ++uint8_t select_reut_ranks(struct sysinfo *ctrl, const uint8_t channel, uint8_t rankmask) ++{ ++	rankmask &= ctrl->rankmap[channel]; ++ ++	uint8_t rank_count = 0; ++	uint32_t rank_log_to_phys = 0; ++	for (uint8_t rank = 0; rank < NUM_SLOTRANKS; rank++) { ++		if (!rank_in_mask(rank, rankmask)) ++			continue; ++ ++		rank_log_to_phys |= rank << (4 * rank_count); ++		rank_count++; ++	} ++	mchbar_write32(REUT_ch_RANK_LOG_TO_PHYS(channel), rank_log_to_phys); ++ ++	union reut_seq_cfg_reg reut_seq_cfg = { ++		.raw = mchbar_read64(REUT_ch_SEQ_CFG(channel)), ++	}; ++	if (!rank_count) { ++		reut_seq_cfg.global_control = 0; ++		mchbar_write64(REUT_ch_SEQ_CFG(channel), reut_seq_cfg.raw); ++		return 0; ++	} ++	union reut_seq_base_addr_reg reut_seq_addr_stop = { ++		.raw = mchbar_read64(REUT_ch_SEQ_ADDR_WRAP(channel)), ++	}; ++	reut_seq_addr_stop.rank_addr = rank_count - 1; ++	mchbar_write64(REUT_ch_SEQ_ADDR_WRAP(channel), reut_seq_addr_stop.raw); ++ ++	reut_seq_cfg.global_control = 1; ++	mchbar_write64(REUT_ch_SEQ_CFG(channel), reut_seq_cfg.raw); ++	return BIT(channel); ++} ++ ++void run_mpr_io_test(const bool clear_errors) ++{ ++	io_reset(); ++	mchbar_write32(REUT_GLOBAL_CTL, (union reut_seq_ctl_reg) { ++		.start_test   = 1, ++		.clear_errors = clear_errors, ++	}.raw); ++	tick_delay(2); ++	io_reset(); ++	tick_delay(2); ++	mchbar_write32(REUT_GLOBAL_CTL, (union reut_seq_ctl_reg) { ++		.stop_test = 1, ++	}.raw); ++} ++ ++static uint8_t get_num_tests(const uint8_t dq_pat) ++{ ++	switch (dq_pat) { ++	case SEGMENT_WDB:	return 4; ++	case CADB:		return 7; ++	case TURN_AROUND_WR:	return 8; ++	case TURN_AROUND_ODT:	return 4; ++	case RD_RD_TA:		return 2; ++	case RD_RD_TA_ALL:	return 8; ++	default:		return 1; ++	} ++} ++ ++uint8_t run_io_test( ++	struct sysinfo *const ctrl, ++	const uint8_t chanmask, ++	const uint8_t dq_pat, ++	const bool clear_errors) ++{ ++	/* SEGMENT_WDB only runs 4 tests */ ++	const uint8_t segment_wdb_lc[4] = { 0, 0, 4, 2 }; ++	const union reut_pat_wdb_cl_ctrl_reg pat_wdb_cl[4] = { ++		[0] = { ++			.start_ptr =  0, ++			.end_ptr   =  9, ++			.inc_rate  = 25, ++			.inc_scale = SCALE_LINEAR, ++		}, ++		[1] = { ++			.start_ptr =  0, ++			.end_ptr   =  9, ++			.inc_rate  = 25, ++			.inc_scale = SCALE_LINEAR, ++		}, ++		[2] = { ++			.start_ptr = 10, ++			.end_ptr   = 63, ++			.inc_rate  = 19, ++			.inc_scale = SCALE_LINEAR, ++		}, ++		[3] = { ++			.start_ptr = 10, ++			.end_ptr   = 63, ++			.inc_rate  = 10, ++			.inc_scale = SCALE_LINEAR, ++		}, ++	}; ++	const bool is_turnaround = dq_pat == RD_RD_TA || dq_pat == RD_RD_TA_ALL; ++	const uint8_t num_tests = get_num_tests(dq_pat); ++	union tc_bank_rank_a_reg tc_bank_rank_a[NUM_CHANNELS] = { 0 }; ++	if (is_turnaround) { ++		for (uint8_t channel = 0; channel < NUM_CHANNELS; channel++) { ++			if (!(chanmask & BIT(channel))) ++				continue; ++ ++			tc_bank_rank_a[channel].raw = ctrl->tc_bankrank_a[channel].raw; ++		} ++	} ++	for (uint8_t t = 0; t < num_tests; t++) { ++		for (uint8_t channel = 0; channel < NUM_CHANNELS; channel++) { ++			if (!(chanmask & BIT(channel))) ++				continue; ++ ++			if (dq_pat == SEGMENT_WDB) { ++				mchbar_write32(REUT_ch_WDB_CL_CTRL(channel), pat_wdb_cl[t].raw); ++				/* ++				 * Skip programming LFSR save/restore. Too complex ++				 * unless power of 2. Program desired loopcount. ++				 */ ++				const uint8_t pat_lc = ctrl->dq_pat_lc + segment_wdb_lc[t]; ++				program_loop_count(ctrl, channel, pat_lc); ++			} else if (dq_pat == CADB) { ++				setup_cadb(ctrl, channel, num_tests, t); ++			} else if (dq_pat == TURN_AROUND_WR || dq_pat == TURN_AROUND_ODT) { ++				union reut_seq_cfg_reg reut_seq_cfg = { ++					.raw = mchbar_read64(REUT_ch_SEQ_CFG(channel)), ++				}; ++				reut_seq_cfg.subsequence_start_pointer = t; ++				reut_seq_cfg.subsequence_end_pointer   = t; ++				mchbar_write64(REUT_ch_SEQ_CFG(channel), reut_seq_cfg.raw); ++				union reut_seq_addr_inc_ctl_reg addr_inc_ctl = { ++					.raw = mchbar_read64(REUT_ch_SEQ_ADDR_INC_CTL(channel)), ++				}; ++				uint8_t ta_inc_rate = 1; ++				if (dq_pat == TURN_AROUND_WR && (t == 0 || t == 7)) ++					ta_inc_rate = 0; ++				else if (dq_pat == TURN_AROUND_ODT && (t == 0 || t == 2)) ++					ta_inc_rate = 0; ++ ++				/* Program increment rate as linear value */ ++				addr_inc_ctl.rank_addr_update = BIT(7) | ta_inc_rate; ++				addr_inc_ctl.col_addr_update  = BIT(7) | ta_inc_rate; ++				mchbar_write64(REUT_ch_SEQ_ADDR_INC_CTL(channel), ++						addr_inc_ctl.raw); ++			} else if (dq_pat == RD_RD_TA) { ++				tc_bank_rank_a[channel].tRDRD_sr = (t == 0) ? 4 : 5; ++				mchbar_write32(TC_BANK_RANK_A_ch(channel), ++						tc_bank_rank_a[channel].raw); ++			} else if (dq_pat == RD_RD_TA_ALL) { ++				/* ++				 * Program tRDRD for SR and DR. Run 8 tests, covering ++				 * tRDRD_sr = 4, 5, 6, 7 and tRDRD_dr = min, +1, +2, +3 ++				 */ ++				const uint32_t tRDRD_dr = ctrl->tc_bankrank_a[channel].tRDRD_dr; ++				tc_bank_rank_a[channel].tRDRD_sr = (t % 4) + 4; ++				tc_bank_rank_a[channel].tRDRD_dr = (t % 4) + tRDRD_dr; ++				mchbar_write32(TC_BANK_RANK_A_ch(channel), ++						tc_bank_rank_a[channel].raw); ++ ++				/* Program linear rank increment rate */ ++				union reut_seq_addr_inc_ctl_reg addr_inc_ctl = { ++					.raw = mchbar_read64(REUT_ch_SEQ_ADDR_INC_CTL(channel)), ++				}; ++				addr_inc_ctl.rank_addr_update = BIT(7) | (t / 4) ? 0 : 31; ++				mchbar_write64(REUT_ch_SEQ_ADDR_INC_CTL(channel), ++						addr_inc_ctl.raw); ++			} ++		} ++		bool test_soe = false; ++		for (uint8_t channel = 0; channel < NUM_CHANNELS; channel++) { ++			if (!(chanmask & BIT(channel))) ++				continue; ++ ++			const union reut_err_ctl_reg reut_err_ctl = { ++				.raw = mchbar_read32(REUT_ch_ERR_CONTROL(channel)), ++			}; ++			const uint8_t soe = reut_err_ctl.stop_on_error_control; ++			if (soe != NSOE) { ++				test_soe = true; ++				break; ++			} ++		} ++		io_reset(); ++		mchbar_write32(REUT_GLOBAL_CTL, (union reut_seq_ctl_reg) { ++			.start_test   = 1, ++			.clear_errors = clear_errors && t == 0, ++		}.raw); ++		struct mono_time prev, curr; ++		timer_monotonic_get(&prev); ++		union reut_global_err_reg global_err; ++		do { ++			global_err.raw = mchbar_read32(REUT_GLOBAL_ERR); ++			/** TODO: Clean up this mess **/ ++			timer_monotonic_get(&curr); ++			if (mono_time_diff_microseconds(&prev, &curr) > 1000 * 1000) { ++				mchbar_write32(REUT_GLOBAL_CTL, (union reut_seq_ctl_reg) { ++					.stop_test = 1, ++				}.raw); ++				printk(BIOS_ERR, "REUT timed out, ch_done: %x\n", ++					global_err.ch_test_done); ++				break; ++			} ++		} while ((global_err.ch_test_done & chanmask) != chanmask); ++		if (test_soe && global_err.ch_error & chanmask) ++			break; ++	} ++	if (is_turnaround) { ++		for (uint8_t channel = 0; channel < NUM_CHANNELS; channel++) { ++			if (!(chanmask & BIT(channel))) ++				continue; ++ ++			mchbar_write32(TC_BANK_RANK_A_ch(channel), ++				ctrl->tc_bankrank_a[channel].raw); ++		} ++	} ++	return ((union reut_global_err_reg)mchbar_read32(REUT_GLOBAL_ERR)).ch_error; ++} +diff --git a/src/northbridge/intel/haswell/registers/mchbar.h b/src/northbridge/intel/haswell/registers/mchbar.h +index f8408e51a0..817a9f8bf8 100644 +--- a/src/northbridge/intel/haswell/registers/mchbar.h ++++ b/src/northbridge/intel/haswell/registers/mchbar.h +@@ -94,20 +94,35 @@ + #define TC_BANK_RANK_D_ch(ch)			_MCMAIN_C(0x4014, ch) + #define SC_ROUNDT_LAT_ch(ch)			_MCMAIN_C(0x4024, ch) +  ++#define REUT_ch_PAT_WDB_CL_MUX_CFG(ch)		_MCMAIN_C(0x4040, ch) ++ + #define REUT_ch_PAT_WDB_CL_MUX_WR_x(ch, x)	_MCMAIN_C_X(0x4048, ch, x) /* x in 0 .. 2 */ + #define REUT_ch_PAT_WDB_CL_MUX_RD_x(ch, x)	_MCMAIN_C_X(0x4054, ch, x) /* x in 0 .. 2 */ +  + #define REUT_ch_PAT_WDB_CL_MUX_LMN(ch)		_MCMAIN_C(0x4078, ch) +  ++#define REUT_ch_PAT_WDB_INV(ch)			_MCMAIN_C(0x4084, ch) ++ ++#define REUT_ch_ERR_CONTROL(ch)			_MCMAIN_C(0x4098, ch) ++#define REUT_ch_ERR_ECC_MASK(ch)		_MCMAIN_C(0x409c, ch) ++ + #define SC_WR_ADD_DELAY_ch(ch)			_MCMAIN_C(0x40d0, ch) +  ++#define REUT_ch_ERR_DATA_MASK(ch)		_MCMAIN_C(0x40d8, ch) ++ + #define REUT_ch_MISC_CKE_CTRL(ch)		_MCMAIN_C(0x4190, ch) +  ++#define REUT_ch_MISC_PAT_CADB_CTRL(ch)		_MCMAIN_C(0x4198, ch) + #define REUT_ch_PAT_CADB_MRS(ch)		_MCMAIN_C(0x419c, ch) ++#define REUT_ch_PAT_CADB_MUX_CTRL(ch)		_MCMAIN_C(0x41a0, ch) ++#define REUT_ch_PAT_CADB_MUX_x(ch, x)		_MCMAIN_C_X(0x41a4, ch, x) /* x in 0 .. 2 */ +  ++#define REUT_ch_PAT_CADB_CL_MUX_LMN(ch)		_MCMAIN_C(0x41b0, ch) + #define REUT_ch_PAT_CADB_WRITE_PTR(ch)		_MCMAIN_C(0x41bc, ch) + #define REUT_ch_PAT_CADB_PROG(ch)		_MCMAIN_C(0x41c0, ch) +  ++#define REUT_ch_WDB_CL_CTRL(ch)			_MCMAIN_C(0x4200, ch) ++ + #define TC_ZQCAL_ch(ch)				_MCMAIN_C(0x4290, ch) + #define TC_RFP_ch(ch)				_MCMAIN_C(0x4294, ch) + #define TC_RFTP_ch(ch)				_MCMAIN_C(0x4298, ch) +@@ -119,12 +134,27 @@ + #define QCLK_ch_LDAT_SDAT(ch)			_MCMAIN_C(0x42d4, ch) + #define QCLK_ch_LDAT_DATA_IN_x(ch, x)		_MCMAIN_C_X(0x42dc, ch, x) /* x in 0 .. 1 */ +  ++#define REUT_GLOBAL_CTL				0x4800 + #define REUT_GLOBAL_ERR				0x4804 +  ++#define REUT_ch_SUBSEQ_x_CTL(ch, x)		(0x4808 + 40 * (ch) + 4 * (x)) ++ + #define REUT_ch_SEQ_CFG(ch)			(0x48a8 + 8 * (ch)) +  + #define REUT_ch_SEQ_CTL(ch)			(0x48b8 + 4 * (ch)) +  ++#define REUT_ch_SEQ_ADDR_START(ch)		(0x48d8 + 8 * (ch)) ++ ++#define REUT_ch_SEQ_ADDR_WRAP(ch)		(0x48e8 + 8 * (ch)) ++ ++#define REUT_ch_SEQ_MISC_CTL(ch)		(0x4908 + 4 * (ch)) ++ ++#define REUT_ch_SEQ_ADDR_INC_CTL(ch)		(0x4910 + 8 * (ch)) ++ ++#define REUT_ch_RANK_LOG_TO_PHYS(ch)		(0x4930 + 4 * (ch)) /* 4 bits per rank */ ++ ++#define HSW_REUT_ch_SEQ_LOOP_COUNT(ch)		(0x4980 + 4 * (ch)) /* *** only on C0 *** */ ++ + /* MCMAIN broadcast */ + #define MCSCHEDS_CBIT		0x4c20 +  +--  +2.39.2 + diff --git a/config/coreboot/haswell/patches/0019-haswell-NRI-Add-range-tracking-library.patch b/config/coreboot/haswell/patches/0019-haswell-NRI-Add-range-tracking-library.patch new file mode 100644 index 00000000..f433b043 --- /dev/null +++ b/config/coreboot/haswell/patches/0019-haswell-NRI-Add-range-tracking-library.patch @@ -0,0 +1,222 @@ +From 9fba0468e75877cbda62f5eaeef1946d6489a8f9 Mon Sep 17 00:00:00 2001 +From: Angel Pons <th3fanbus@gmail.com> +Date: Sun, 8 May 2022 00:56:00 +0200 +Subject: [PATCH 19/26] haswell NRI: Add range tracking library + +Implement a small library used to keep track of passing ranges. This +will be used by 1D training algorithms when margining some parameter. + +Change-Id: I8718e85165160afd7c0c8e730b5ce6c9c00f8a60 +Signed-off-by: Angel Pons <th3fanbus@gmail.com> +--- + .../intel/haswell/native_raminit/Makefile.inc |   1 + + .../intel/haswell/native_raminit/ranges.c     | 109 ++++++++++++++++++ + .../intel/haswell/native_raminit/ranges.h     |  68 +++++++++++ + 3 files changed, 178 insertions(+) + create mode 100644 src/northbridge/intel/haswell/native_raminit/ranges.c + create mode 100644 src/northbridge/intel/haswell/native_raminit/ranges.h + +diff --git a/src/northbridge/intel/haswell/native_raminit/Makefile.inc b/src/northbridge/intel/haswell/native_raminit/Makefile.inc +index 6e1b365602..2da950771d 100644 +--- a/src/northbridge/intel/haswell/native_raminit/Makefile.inc ++++ b/src/northbridge/intel/haswell/native_raminit/Makefile.inc +@@ -9,6 +9,7 @@ romstage-y += io_comp_control.c + romstage-y += memory_map.c + romstage-y += raminit_main.c + romstage-y += raminit_native.c ++romstage-y += ranges.c + romstage-y += reut.c + romstage-y += setup_wdb.c + romstage-y += spd_bitmunching.c +diff --git a/src/northbridge/intel/haswell/native_raminit/ranges.c b/src/northbridge/intel/haswell/native_raminit/ranges.c +new file mode 100644 +index 0000000000..cdebc1fa66 +--- /dev/null ++++ b/src/northbridge/intel/haswell/native_raminit/ranges.c +@@ -0,0 +1,109 @@ ++/* SPDX-License-Identifier: GPL-2.0-or-later */ ++ ++#include <types.h> ++ ++#include "ranges.h" ++ ++void linear_record_pass( ++	struct linear_train_data *const data, ++	const bool pass, ++	const int32_t value, ++	const int32_t start, ++	const int32_t step) ++{ ++	/* If this is the first time, initialize all values */ ++	if (value == start) { ++		/* ++		 * If value passed, create a zero-length region for the current value, ++		 * which may be extended as long as the successive values are passing. ++		 * ++		 * Otherwise, create a zero-length range for the preceding value. This ++		 * range cannot be extended by other passing values, which is desired. ++		 */ ++		data->current.start = start - (pass ? 0 : step); ++		data->current.end   = data->current.start; ++		data->largest       = data->current; ++	} else if (pass) { ++		/* If this pass is not contiguous, it belongs to a new region */ ++		if (data->current.end != (value - step)) ++			data->current.start = value; ++ ++		/* Update end of current region */ ++		data->current.end = value; ++ ++		/* Update largest region */ ++		if (range_width(data->current) > range_width(data->largest)) ++			data->largest = data->current; ++	} ++} ++ ++void phase_record_pass( ++	struct phase_train_data *const data, ++	const bool pass, ++	const int32_t value, ++	const int32_t start, ++	const int32_t step) ++{ ++	/* If this is the first time, initialize all values */ ++	if (value == start) { ++		/* ++		 * If value passed, create a zero-length region for the current value, ++		 * which may be extended as long as the successive values are passing. ++		 * ++		 * Otherwise, create a zero-length range for the preceding value. This ++		 * range cannot be extended by other passing values, which is desired. ++		 */ ++		data->current.start = start - (pass ? 0 : step); ++		data->current.end   = data->current.start; ++		data->largest       = data->current; ++		data->initial       = data->current; ++		return; ++	} ++	if (!pass) ++		return; ++ ++	/* Update initial region */ ++	if (data->initial.end == (value - step)) ++		data->initial.end = value; ++ ++	/* If this pass is not contiguous, it belongs to a new region */ ++	if (data->current.end != (value - step)) ++		data->current.start = value; ++ ++	/* Update end of current region */ ++	data->current.end = value; ++ ++	/* Update largest region */ ++	if (range_width(data->current) > range_width(data->largest)) ++		data->largest = data->current; ++} ++ ++void phase_append_initial_to_current( ++	struct phase_train_data *const data, ++	const int32_t start, ++	const int32_t step) ++{ ++	/* If initial region is valid and does not overlap, append it */ ++	if (data->initial.start == start && data->initial.end != data->current.end) ++		data->current.end += step + range_width(data->initial); ++ ++	/* Update largest region */ ++	if (range_width(data->current) > range_width(data->largest)) ++		data->largest = data->current; ++} ++ ++void phase_append_current_to_initial( ++	struct phase_train_data *const data, ++	const int32_t start, ++	const int32_t step) ++{ ++	/* If initial region is valid and does not overlap, append it */ ++	if (data->initial.start == start && data->initial.end != data->current.end) { ++		data->initial.start -= (step + range_width(data->current)); ++		data->current = data->initial; ++	} ++ ++	/* Update largest region */ ++	if (range_width(data->current) > range_width(data->largest)) ++		data->largest = data->current; ++} +diff --git a/src/northbridge/intel/haswell/native_raminit/ranges.h b/src/northbridge/intel/haswell/native_raminit/ranges.h +new file mode 100644 +index 0000000000..235392df96 +--- /dev/null ++++ b/src/northbridge/intel/haswell/native_raminit/ranges.h +@@ -0,0 +1,68 @@ ++/* SPDX-License-Identifier: GPL-2.0-or-later */ ++ ++#ifndef HASWELL_RAMINIT_RANGES_H ++#define HASWELL_RAMINIT_RANGES_H ++ ++#include <types.h> ++ ++/* ++ * Many algorithms shmoo some parameter to determine the largest passing ++ * range. Provide a common implementation to avoid redundant boilerplate. ++ */ ++struct passing_range { ++	int32_t start; ++	int32_t end; ++}; ++ ++/* Structure for linear parameters, such as roundtrip delays */ ++struct linear_train_data { ++	struct passing_range current; ++	struct passing_range largest; ++}; ++ ++/* ++ * Phase ranges are "circular": the first and last indices are contiguous. ++ * To correctly determine the largest passing range, one has to combine ++ * the initial range and the current range when processing the last index. ++ */ ++struct phase_train_data { ++	struct passing_range initial; ++	struct passing_range current; ++	struct passing_range largest; ++}; ++ ++static inline int32_t range_width(const struct passing_range range) ++{ ++	return range.end - range.start; ++} ++ ++static inline int32_t range_center(const struct passing_range range) ++{ ++	return range.start + range_width(range) / 2; ++} ++ ++void linear_record_pass( ++	struct linear_train_data *data, ++	bool pass, ++	int32_t value, ++	int32_t start, ++	int32_t step); ++ ++void phase_record_pass( ++	struct phase_train_data *data, ++	bool pass, ++	int32_t value, ++	int32_t start, ++	int32_t step); ++ ++void phase_append_initial_to_current( ++	struct phase_train_data *data, ++	int32_t start, ++	int32_t step); ++ ++void phase_append_current_to_initial( ++	struct phase_train_data *data, ++	int32_t start, ++	int32_t step); ++ ++#endif +--  +2.39.2 + diff --git a/config/coreboot/haswell/patches/0020-haswell-NRI-Add-library-to-change-margins.patch b/config/coreboot/haswell/patches/0020-haswell-NRI-Add-library-to-change-margins.patch new file mode 100644 index 00000000..30926494 --- /dev/null +++ b/config/coreboot/haswell/patches/0020-haswell-NRI-Add-library-to-change-margins.patch @@ -0,0 +1,294 @@ +From 54cfbe4cf53d16f747bfcfadd20445a0f5f1e5db Mon Sep 17 00:00:00 2001 +From: Angel Pons <th3fanbus@gmail.com> +Date: Sun, 8 May 2022 01:11:03 +0200 +Subject: [PATCH 20/26] haswell NRI: Add library to change margins + +Implement a library to change Rx/Tx margins. It will be expanded later. + +Change-Id: I0b55aba428d8b4d4e16d2fbdec57235ce3ce8adf +Signed-off-by: Angel Pons <th3fanbus@gmail.com> +--- + .../intel/haswell/native_raminit/Makefile.inc |   1 + + .../haswell/native_raminit/change_margin.c    | 154 ++++++++++++++++++ + .../haswell/native_raminit/raminit_native.h   |  50 ++++++ + .../intel/haswell/registers/mchbar.h          |   9 + + 4 files changed, 214 insertions(+) + create mode 100644 src/northbridge/intel/haswell/native_raminit/change_margin.c + +diff --git a/src/northbridge/intel/haswell/native_raminit/Makefile.inc b/src/northbridge/intel/haswell/native_raminit/Makefile.inc +index 2da950771d..ebe9e9b762 100644 +--- a/src/northbridge/intel/haswell/native_raminit/Makefile.inc ++++ b/src/northbridge/intel/haswell/native_raminit/Makefile.inc +@@ -1,5 +1,6 @@ + ## SPDX-License-Identifier: GPL-2.0-or-later +  ++romstage-y += change_margin.c + romstage-y += configure_mc.c + romstage-y += ddr3.c + romstage-y += jedec_reset.c +diff --git a/src/northbridge/intel/haswell/native_raminit/change_margin.c b/src/northbridge/intel/haswell/native_raminit/change_margin.c +new file mode 100644 +index 0000000000..12da59580f +--- /dev/null ++++ b/src/northbridge/intel/haswell/native_raminit/change_margin.c +@@ -0,0 +1,154 @@ ++/* SPDX-License-Identifier: GPL-2.0-or-later */ ++ ++#include <commonlib/clamp.h> ++#include <console/console.h> ++#include <delay.h> ++#include <northbridge/intel/haswell/haswell.h> ++#include <timer.h> ++ ++#include "raminit_native.h" ++ ++void update_rxt( ++	struct sysinfo *ctrl, ++	const uint8_t channel, ++	const uint8_t rank, ++	const uint8_t byte, ++	const enum rxt_subfield subfield, ++	const int32_t value) ++{ ++	union ddr_data_rx_train_rank_reg rxt = { ++		.rcven =  ctrl->rcven[channel][rank][byte], ++		.dqs_p = ctrl->rxdqsp[channel][rank][byte], ++		.rx_eq =  ctrl->rx_eq[channel][rank][byte], ++		.dqs_n = ctrl->rxdqsn[channel][rank][byte], ++		.vref  = ctrl->rxvref[channel][rank][byte], ++	}; ++	int32_t new_value; ++	switch (subfield) { ++	case RXT_RCVEN: ++		new_value = clamp_s32(0, value, 511); ++		rxt.rcven = new_value; ++		break; ++	case RXT_RXDQS_P: ++		new_value = clamp_s32(0, value, 63); ++		rxt.dqs_p = new_value; ++		break; ++	case RXT_RX_EQ: ++		new_value = clamp_s32(0, value, 31); ++		rxt.rx_eq = new_value; ++		break; ++	case RXT_RXDQS_N: ++		new_value = clamp_s32(0, value, 63); ++		rxt.dqs_n = new_value; ++		break; ++	case RXT_RX_VREF: ++		new_value = clamp_s32(-32, value, 31); ++		rxt.vref = new_value; ++		break; ++	case RXT_RXDQS_BOTH: ++		new_value = clamp_s32(0, value, 63); ++		rxt.dqs_p = new_value; ++		rxt.dqs_n = new_value; ++		break; ++	case RXT_RESTORE: ++		new_value = value; ++		break; ++	default: ++		die("%s: Unhandled subfield index %u\n", __func__, subfield); ++	} ++ ++	if (new_value != value) { ++		printk(BIOS_ERR, "%s: Overflow for subfield %u: %d ---> %d\n", ++			__func__, subfield, value, new_value); ++	} ++	mchbar_write32(RX_TRAIN_ch_r_b(channel, rank, byte), rxt.raw); ++	download_regfile(ctrl, channel, false, rank, REG_FILE_USE_RANK, byte, true, false); ++} ++ ++void update_txt( ++	struct sysinfo *ctrl, ++	const uint8_t channel, ++	const uint8_t rank, ++	const uint8_t byte, ++	const enum txt_subfield subfield, ++	const int32_t value) ++{ ++	union ddr_data_tx_train_rank_reg txt = { ++		.dq_delay  = ctrl->tx_dq[channel][rank][byte], ++		.dqs_delay = ctrl->txdqs[channel][rank][byte], ++		.tx_eq     = ctrl->tx_eq[channel][rank][byte], ++	}; ++	int32_t new_value; ++	switch (subfield) { ++	case TXT_TX_DQ: ++		new_value = clamp_s32(0, value, 511); ++		txt.dq_delay = new_value; ++		break; ++	case TXT_TXDQS: ++		new_value = clamp_s32(0, value, 511); ++		txt.dqs_delay = new_value; ++		break; ++	case TXT_TX_EQ: ++		new_value = clamp_s32(0, value, 63); ++		txt.tx_eq = new_value; ++		break; ++	case TXT_DQDQS_OFF: ++		new_value = value; ++		txt.dqs_delay += new_value; ++		txt.dq_delay  += new_value; ++		break; ++	case TXT_RESTORE: ++		new_value = value; ++		break; ++	default: ++		die("%s: Unhandled subfield index %u\n", __func__, subfield); ++	} ++	if (new_value != value) { ++		printk(BIOS_ERR, "%s: Overflow for subfield %u: %d ---> %d\n", ++			__func__, subfield, value, new_value); ++	} ++	mchbar_write32(TX_TRAIN_ch_r_b(channel, rank, byte), txt.raw); ++	download_regfile(ctrl, channel, false, rank, REG_FILE_USE_RANK, byte, false, true); ++} ++ ++void download_regfile( ++	struct sysinfo *ctrl, ++	const uint8_t channel, ++	const bool multicast, ++	const uint8_t rank, ++	const enum regfile_mode regfile, ++	const uint8_t byte, ++	const bool read_rf_rd, ++	const bool read_rf_wr) ++{ ++	union reut_seq_base_addr_reg reut_seq_base_addr; ++	switch (regfile) { ++	case REG_FILE_USE_START: ++		reut_seq_base_addr.raw = mchbar_read64(REUT_ch_SEQ_ADDR_START(channel)); ++		break; ++	case REG_FILE_USE_CURRENT: ++		reut_seq_base_addr.raw = mchbar_read64(REUT_ch_SEQ_ADDR_CURRENT(channel)); ++		break; ++	case REG_FILE_USE_RANK: ++		reut_seq_base_addr.raw = 0; ++		if (rank >= NUM_SLOTRANKS) ++			die("%s: bad rank %u\n", __func__, rank); ++		break; ++	default: ++		die("%s: Invalid regfile param %u\n", __func__, regfile); ++	} ++	uint8_t phys_rank = rank; ++	if (reut_seq_base_addr.raw != 0) { ++		/* Map REUT logical rank to physical rank */ ++		const uint32_t log_to_phys = mchbar_read32(REUT_ch_RANK_LOG_TO_PHYS(channel)); ++		phys_rank = log_to_phys >> (reut_seq_base_addr.rank_addr * 4) & 0x3; ++	} ++	uint32_t reg = multicast ? DDR_DATA_ch_CONTROL_0(channel) : DQ_CONTROL_0(channel, byte); ++	union ddr_data_control_0_reg ddr_data_control_0 = { ++		.raw = mchbar_read32(reg), ++	}; ++	ddr_data_control_0.read_rf_rd   = read_rf_rd; ++	ddr_data_control_0.read_rf_wr   = read_rf_wr; ++	ddr_data_control_0.read_rf_rank = phys_rank; ++	mchbar_write32(reg, ddr_data_control_0.raw); ++} +diff --git a/src/northbridge/intel/haswell/native_raminit/raminit_native.h b/src/northbridge/intel/haswell/native_raminit/raminit_native.h +index 56df36ca8d..7c1a786780 100644 +--- a/src/northbridge/intel/haswell/native_raminit/raminit_native.h ++++ b/src/northbridge/intel/haswell/native_raminit/raminit_native.h +@@ -117,6 +117,30 @@ enum test_stop { + 	ALSOE  = 3,	/* Stop on all lanes error */ + }; +  ++enum rxt_subfield { ++	RXT_RCVEN	= 0, ++	RXT_RXDQS_P	= 1, ++	RXT_RX_EQ	= 2, ++	RXT_RXDQS_N	= 3, ++	RXT_RX_VREF	= 4, ++	RXT_RXDQS_BOTH	= 5, ++	RXT_RESTORE	= 255, ++}; ++ ++enum txt_subfield { ++	TXT_TX_DQ	= 0, ++	TXT_TXDQS	= 1, ++	TXT_TX_EQ	= 2, ++	TXT_DQDQS_OFF	= 3, ++	TXT_RESTORE	= 255, ++}; ++ ++enum regfile_mode { ++	REG_FILE_USE_RANK,	/* Used when changing parameters for each rank */ ++	REG_FILE_USE_START,	/* Used when changing parameters before the test */ ++	REG_FILE_USE_CURRENT,	/* Used when changing parameters after the test */ ++}; ++ + struct wdb_pat { + 	uint32_t start_ptr;	/* Starting pointer in WDB */ + 	uint32_t stop_ptr;	/* Stopping pointer in WDB */ +@@ -452,6 +476,32 @@ uint8_t select_reut_ranks(struct sysinfo *ctrl, uint8_t channel, uint8_t rankmas + void run_mpr_io_test(bool clear_errors); + uint8_t run_io_test(struct sysinfo *ctrl, uint8_t chanmask, uint8_t dq_pat, bool clear_errors); +  ++void update_rxt( ++	struct sysinfo *ctrl, ++	uint8_t channel, ++	uint8_t rank, ++	uint8_t byte, ++	enum rxt_subfield subfield, ++	int32_t value); ++ ++void update_txt( ++	struct sysinfo *ctrl, ++	uint8_t channel, ++	uint8_t rank, ++	uint8_t byte, ++	enum txt_subfield subfield, ++	int32_t value); ++ ++void download_regfile( ++	struct sysinfo *ctrl, ++	uint8_t channel, ++	bool multicast, ++	uint8_t rank, ++	enum regfile_mode regfile, ++	uint8_t byte, ++	bool read_rf_rd, ++	bool read_rf_wr); ++ + uint8_t get_rx_bias(const struct sysinfo *ctrl); +  + uint8_t get_tCWL(uint32_t mem_clock_mhz); +diff --git a/src/northbridge/intel/haswell/registers/mchbar.h b/src/northbridge/intel/haswell/registers/mchbar.h +index 817a9f8bf8..a81559bb1e 100644 +--- a/src/northbridge/intel/haswell/registers/mchbar.h ++++ b/src/northbridge/intel/haswell/registers/mchbar.h +@@ -15,7 +15,11 @@ + /* Register definitions */ +  + /* DDR DATA per-channel per-bytelane */ ++#define RX_TRAIN_ch_r_b(ch, rank, byte)		_DDRIO_C_R_B(0x0000, ch, rank, byte) ++#define TX_TRAIN_ch_r_b(ch, rank, byte)		_DDRIO_C_R_B(0x0020, ch, rank, byte) ++ + #define DQ_CONTROL_2(ch, byte)			_DDRIO_C_R_B(0x0064, ch, 0, byte) ++#define DQ_CONTROL_0(ch, byte)			_DDRIO_C_R_B(0x0074, ch, 0, byte) +  + /* DDR CKE per-channel */ + #define DDR_CKE_ch_CMD_COMP_OFFSET(ch)		_DDRIO_C_R_B(0x1204, ch, 0, 0) +@@ -38,6 +42,9 @@ + #define DDR_SCRAMBLE_ch(ch)			(0x2000 + 4 * (ch)) + #define DDR_SCRAM_MISC_CONTROL			0x2008 +  ++/* DDR DATA per-channel multicast */ ++#define DDR_DATA_ch_CONTROL_0(ch)		_DDRIO_C_R_B(0x3074, ch, 0, 0) ++ + /* DDR CMDN/CMDS per-channel (writes go to both CMDN and CMDS fubs) */ + #define DDR_CMD_ch_COMP_OFFSET(ch)		_DDRIO_C_R_B(0x3204, ch, 0, 0) + #define DDR_CMD_ch_PI_CODING(ch)		_DDRIO_C_R_B(0x3208, ch, 0, 0) +@@ -147,6 +154,8 @@ +  + #define REUT_ch_SEQ_ADDR_WRAP(ch)		(0x48e8 + 8 * (ch)) +  ++#define REUT_ch_SEQ_ADDR_CURRENT(ch)		(0x48f8 + 8 * (ch)) ++ + #define REUT_ch_SEQ_MISC_CTL(ch)		(0x4908 + 4 * (ch)) +  + #define REUT_ch_SEQ_ADDR_INC_CTL(ch)		(0x4910 + 8 * (ch)) +--  +2.39.2 + diff --git a/config/coreboot/haswell/patches/0021-haswell-NRI-Add-RcvEn-training.patch b/config/coreboot/haswell/patches/0021-haswell-NRI-Add-RcvEn-training.patch new file mode 100644 index 00000000..9139a67e --- /dev/null +++ b/config/coreboot/haswell/patches/0021-haswell-NRI-Add-RcvEn-training.patch @@ -0,0 +1,708 @@ +From ac8843553af34855d0331554c03280e66c4ea582 Mon Sep 17 00:00:00 2001 +From: Angel Pons <th3fanbus@gmail.com> +Date: Sun, 8 May 2022 00:05:41 +0200 +Subject: [PATCH 21/26] haswell NRI: Add RcvEn training + +Implement the RcvEn (Receive Enable) calibration procedure. + +Change-Id: Ifbfa520f3e0486c56d0988ce67af2ddb9cf29888 +Signed-off-by: Angel Pons <th3fanbus@gmail.com> +--- + .../intel/haswell/native_raminit/Makefile.inc |   1 + + .../haswell/native_raminit/raminit_main.c     |   1 + + .../haswell/native_raminit/raminit_native.h   |  14 + + .../haswell/native_raminit/reg_structs.h      |  13 + + .../native_raminit/train_receive_enable.c     | 561 ++++++++++++++++++ + .../intel/haswell/registers/mchbar.h          |   3 + + 6 files changed, 593 insertions(+) + create mode 100644 src/northbridge/intel/haswell/native_raminit/train_receive_enable.c + +diff --git a/src/northbridge/intel/haswell/native_raminit/Makefile.inc b/src/northbridge/intel/haswell/native_raminit/Makefile.inc +index ebe9e9b762..e2fbfb4211 100644 +--- a/src/northbridge/intel/haswell/native_raminit/Makefile.inc ++++ b/src/northbridge/intel/haswell/native_raminit/Makefile.inc +@@ -16,3 +16,4 @@ romstage-y += setup_wdb.c + romstage-y += spd_bitmunching.c + romstage-y += testing_io.c + romstage-y += timings_refresh.c ++romstage-y += train_receive_enable.c +diff --git a/src/northbridge/intel/haswell/native_raminit/raminit_main.c b/src/northbridge/intel/haswell/native_raminit/raminit_main.c +index 5e4674957d..7d444659c3 100644 +--- a/src/northbridge/intel/haswell/native_raminit/raminit_main.c ++++ b/src/northbridge/intel/haswell/native_raminit/raminit_main.c +@@ -60,6 +60,7 @@ static const struct task_entry cold_boot[] = { + 	{ configure_memory_map,                                   true, "MEMMAP",     }, + 	{ do_jedec_init,                                          true, "JEDECINIT",  }, + 	{ pre_training,                                           true, "PRETRAIN",   }, ++	{ train_receive_enable,                                   true, "RCVET",      }, + }; +  + /* Return a generic stepping value to make stepping checks simpler */ +diff --git a/src/northbridge/intel/haswell/native_raminit/raminit_native.h b/src/northbridge/intel/haswell/native_raminit/raminit_native.h +index 7c1a786780..a36ebfacd1 100644 +--- a/src/northbridge/intel/haswell/native_raminit/raminit_native.h ++++ b/src/northbridge/intel/haswell/native_raminit/raminit_native.h +@@ -42,6 +42,9 @@ + #define NUM_WDB_CL_MUX_SEEDS	3 + #define NUM_CADB_MUX_SEEDS	3 +  ++/* Specified in PI ticks. 64 PI ticks == 1 qclk */ ++#define tDQSCK_DRIFT		64 ++ + /* ZQ calibration types */ + enum { + 	ZQ_INIT,	/* DDR3: ZQCL with tZQinit, LPDDR3: ZQ Init  with tZQinit  */ +@@ -188,6 +191,7 @@ enum raminit_status { + 	RAMINIT_STATUS_MPLL_INIT_FAILURE, + 	RAMINIT_STATUS_POLL_TIMEOUT, + 	RAMINIT_STATUS_REUT_ERROR, ++	RAMINIT_STATUS_RCVEN_FAILURE, + 	RAMINIT_STATUS_UNSPECIFIED_ERROR, /** TODO: Deprecated in favor of specific values **/ + }; +  +@@ -270,6 +274,10 @@ struct sysinfo { +  + 	union ddr_data_vref_adjust_reg dimm_vref; +  ++	uint8_t io_latency[NUM_CHANNELS][NUM_SLOTRANKS]; ++	uint8_t rt_latency[NUM_CHANNELS][NUM_SLOTRANKS]; ++	uint32_t rt_io_comp[NUM_CHANNELS]; ++ + 	uint32_t data_offset_train[NUM_CHANNELS][NUM_LANES]; + 	uint32_t data_offset_comp[NUM_CHANNELS][NUM_LANES]; +  +@@ -344,6 +352,11 @@ static inline void clear_data_offset_train_all(struct sysinfo *ctrl) + 	memset(ctrl->data_offset_train, 0, sizeof(ctrl->data_offset_train)); + } +  ++static inline uint32_t get_data_train_feedback(const uint8_t channel, const uint8_t byte) ++{ ++	return mchbar_read32(DDR_DATA_TRAIN_FEEDBACK(channel, byte)); ++} ++ + /* Number of ticks to wait in units of 69.841279 ns (citation needed) */ + static inline void tick_delay(const uint32_t delay) + { +@@ -401,6 +414,7 @@ enum raminit_status convert_timings(struct sysinfo *ctrl); + enum raminit_status configure_mc(struct sysinfo *ctrl); + enum raminit_status configure_memory_map(struct sysinfo *ctrl); + enum raminit_status do_jedec_init(struct sysinfo *ctrl); ++enum raminit_status train_receive_enable(struct sysinfo *ctrl); +  + void configure_timings(struct sysinfo *ctrl); + void configure_refresh(struct sysinfo *ctrl); +diff --git a/src/northbridge/intel/haswell/native_raminit/reg_structs.h b/src/northbridge/intel/haswell/native_raminit/reg_structs.h +index b943259b91..b099f4bb82 100644 +--- a/src/northbridge/intel/haswell/native_raminit/reg_structs.h ++++ b/src/northbridge/intel/haswell/native_raminit/reg_structs.h +@@ -297,6 +297,19 @@ union ddr_scram_misc_control_reg { + 	uint32_t raw; + }; +  ++union sc_io_latency_reg { ++	struct __packed { ++		uint32_t iolat_rank0     : 4; // Bits  3:0 ++		uint32_t iolat_rank1     : 4; // Bits  7:4 ++		uint32_t iolat_rank2     : 4; // Bits 11:8 ++		uint32_t iolat_rank3     : 4; // Bits 15:12 ++		uint32_t rt_iocomp       : 6; // Bits 21:16 ++		uint32_t                 : 9; // Bits 30:22 ++		uint32_t dis_rt_clk_gate : 1; // Bits 31:31 ++	}; ++	uint32_t raw; ++}; ++ + union mcscheds_cbit_reg { + 	struct __packed { + 		uint32_t dis_opp_cas    : 1; // Bits  0:0 +diff --git a/src/northbridge/intel/haswell/native_raminit/train_receive_enable.c b/src/northbridge/intel/haswell/native_raminit/train_receive_enable.c +new file mode 100644 +index 0000000000..576c6bc21e +--- /dev/null ++++ b/src/northbridge/intel/haswell/native_raminit/train_receive_enable.c +@@ -0,0 +1,561 @@ ++/* SPDX-License-Identifier: GPL-2.0-or-later */ ++ ++#include <console/console.h> ++#include <northbridge/intel/haswell/haswell.h> ++#include <types.h> ++ ++#include "raminit_native.h" ++#include "ranges.h" ++ ++#define RCVEN_PLOT	RAM_DEBUG ++ ++static enum raminit_status change_rcven_timing(struct sysinfo *ctrl, const uint8_t channel) ++{ ++	int16_t max_rcven = -4096; ++	int16_t min_rcven = 4096; ++	int16_t max_rcven_rank[NUM_SLOTRANKS]; ++	int16_t min_rcven_rank[NUM_SLOTRANKS]; ++	for (uint8_t rank = 0; rank < NUM_SLOTRANKS; rank++) { ++		max_rcven_rank[rank] = max_rcven; ++		min_rcven_rank[rank] = min_rcven; ++	} ++	for (uint8_t rank = 0; rank < NUM_SLOTRANKS; rank++) { ++		if (!rank_in_ch(ctrl, rank, channel)) ++			continue; ++ ++		for (uint8_t byte = 0; byte < ctrl->lanes; byte++) { ++			int16_t new_rcven = ctrl->rcven[channel][rank][byte]; ++			new_rcven -= ctrl->io_latency[channel][rank] * 64; ++			if (max_rcven_rank[rank] < new_rcven) ++				max_rcven_rank[rank] = new_rcven; ++ ++			if (min_rcven_rank[rank] > new_rcven) ++				min_rcven_rank[rank] = new_rcven; ++		} ++		if (max_rcven < max_rcven_rank[rank]) ++			max_rcven = max_rcven_rank[rank]; ++ ++		if (min_rcven > min_rcven_rank[rank]) ++			min_rcven = min_rcven_rank[rank]; ++	} ++ ++	/* ++	 * Determine how far we are from the ideal center point for RcvEn timing. ++	 * (PiIdeal - AveRcvEn) / 64 is the ideal number of cycles we should have ++	 * for IO latency. command training will reduce this by 64, so plan for ++	 * that now in the ideal value. Round to closest integer. ++	 */ ++	const int16_t rre_pi_ideal = 256 + 64; ++	const int16_t pi_reserve = 64; ++	const int16_t rcven_center = (max_rcven + min_rcven) / 2; ++	const int8_t iolat_target = DIV_ROUND_CLOSEST(rre_pi_ideal - rcven_center, 64); ++ ++	int8_t io_g_offset = 0; ++	int8_t io_lat[NUM_SLOTRANKS] = { 0 }; ++	for (uint8_t rank = 0; rank < NUM_SLOTRANKS; rank++) { ++		if (!rank_in_ch(ctrl, rank, channel)) ++			continue; ++ ++		io_lat[rank] = iolat_target; ++ ++		/* Check for RcvEn underflow/overflow */ ++		const int16_t rcven_lower = 64 * io_lat[rank] + min_rcven_rank[rank]; ++		if (rcven_lower < pi_reserve) ++			io_lat[rank] += DIV_ROUND_UP(pi_reserve - rcven_lower, 64); ++ ++		const int16_t rcven_upper = 64 * io_lat[rank] + max_rcven_rank[rank]; ++		if (rcven_upper > 511 - pi_reserve) ++			io_lat[rank] -= DIV_ROUND_UP(rcven_upper - (511 - pi_reserve), 64); ++ ++		/* Check for IO latency over/underflow */ ++		if (io_lat[rank] - io_g_offset > 14) ++			io_g_offset = io_lat[rank] - 14; ++ ++		if (io_lat[rank] - io_g_offset < 1) ++			io_g_offset = io_lat[rank] - 1; ++ ++		const int8_t cycle_offset = io_lat[rank] - ctrl->io_latency[channel][rank]; ++		for (uint8_t byte = 0; byte < ctrl->lanes; byte++) { ++			ctrl->rcven[channel][rank][byte] += 64 * cycle_offset; ++			update_rxt(ctrl, channel, rank, byte, RXT_RESTORE, 0); ++		} ++	} ++ ++	/* Calculate new IO comp latency */ ++	union sc_io_latency_reg sc_io_lat = { ++		.raw = mchbar_read32(SC_IO_LATENCY_ch(channel)), ++	}; ++ ++	/* Check if we are underflowing or overflowing this field */ ++	if (io_g_offset < 0 && sc_io_lat.rt_iocomp < -io_g_offset) { ++		printk(BIOS_ERR, "%s: IO COMP underflow\n", __func__); ++		printk(BIOS_ERR, "io_g_offset: %d\n", io_g_offset); ++		printk(BIOS_ERR, "rt_iocomp: %u\n", sc_io_lat.rt_iocomp); ++		return RAMINIT_STATUS_RCVEN_FAILURE; ++	} ++	if (io_g_offset > 0 && io_g_offset > 0x3f - sc_io_lat.rt_iocomp) { ++		printk(BIOS_ERR, "%s: IO COMP overflow\n", __func__); ++		printk(BIOS_ERR, "io_g_offset: %d\n", io_g_offset); ++		printk(BIOS_ERR, "rt_iocomp: %u\n", sc_io_lat.rt_iocomp); ++		return RAMINIT_STATUS_RCVEN_FAILURE; ++	} ++	sc_io_lat.rt_iocomp += io_g_offset; ++	ctrl->rt_io_comp[channel] = sc_io_lat.rt_iocomp; ++	for (uint8_t rank = 0; rank < NUM_SLOTRANKS; rank++) { ++		if (ctrl->rankmap[channel] & BIT(rank)) ++			ctrl->io_latency[channel][rank] = io_lat[rank] - io_g_offset; ++ ++		const uint8_t shift = rank * 4; ++		sc_io_lat.raw &= ~(0xf << shift); ++		sc_io_lat.raw |= ctrl->io_latency[channel][rank] << shift; ++	} ++	mchbar_write32(SC_IO_LATENCY_ch(channel), sc_io_lat.raw); ++	return RAMINIT_STATUS_SUCCESS; ++} ++ ++#define RL_START (256 + 24) ++#define RL_STOP  (384 + 24) ++#define RL_STEP  8 ++ ++#define RE_NUM_SAMPLES	6 ++ ++static enum raminit_status verify_high_region(const int32_t center, const int32_t lwidth) ++{ ++	if (center > RL_STOP) { ++		/* Check if center of high was found where it should be */ ++		printk(BIOS_ERR, "RcvEn: Center of high (%d) higher than expected\n", center); ++		return RAMINIT_STATUS_RCVEN_FAILURE; ++	} ++	if (lwidth <= 32) { ++		/* Check if width is large enough */ ++		printk(BIOS_ERR, "RcvEn: Width of high region (%d) too small\n", lwidth); ++		return RAMINIT_STATUS_RCVEN_FAILURE; ++	} ++	if (lwidth >= 96) { ++		/* Since we're calibrating a phase, a too large region is a problem */ ++		printk(BIOS_ERR, "RcvEn: Width of high region (%d) too large\n", lwidth); ++		return RAMINIT_STATUS_RCVEN_FAILURE; ++	} ++	return RAMINIT_STATUS_SUCCESS; ++} ++ ++static void program_io_latency(struct sysinfo *ctrl, const uint8_t channel, const uint8_t rank) ++{ ++	const uint8_t shift = rank * 4; ++	const uint8_t iolat = ctrl->io_latency[channel][rank]; ++	mchbar_clrsetbits32(SC_IO_LATENCY_ch(channel), 0xf << shift, iolat << shift); ++} ++ ++static void program_rl_delays(struct sysinfo *ctrl, const uint8_t rank, const uint16_t rl_delay) ++{ ++	for (uint8_t channel = 0; channel < NUM_CHANNELS; channel++) { ++		if (!rank_in_ch(ctrl, rank, channel)) ++			continue; ++ ++		for (uint8_t byte = 0; byte < ctrl->lanes; byte++) ++			update_rxt(ctrl, channel, rank, byte, RXT_RCVEN, rl_delay); ++	} ++} ++ ++static bool sample_dqs(const uint8_t channel, const uint8_t byte) ++{ ++	return (get_data_train_feedback(channel, byte) & 0x1ff) >= BIT(RE_NUM_SAMPLES - 1); ++} ++ ++enum raminit_status train_receive_enable(struct sysinfo *ctrl) ++{ ++	const struct reut_box reut_addr = { ++		.col = { ++			.start    = 0, ++			.stop     = 1023, ++			.inc_rate = 0, ++			.inc_val  = 1, ++		}, ++	}; ++	const struct wdb_pat wdb_pattern = { ++		.start_ptr  = 0, ++		.stop_ptr   = 9, ++		.inc_rate   = 32, ++		.dq_pattern = BASIC_VA, ++	}; ++ ++	const uint16_t bytemask = BIT(ctrl->lanes) - 1; ++	const uint8_t fine_step = 1; ++ ++	const uint8_t rt_delta = is_hsw_ult() ? 4 : 2; ++	const uint8_t rt_io_comp = 21 + rt_delta; ++	const uint8_t rt_latency = 16 + rt_delta; ++	setup_io_test( ++		ctrl, ++		ctrl->chanmap, ++		PAT_RD, ++		2, ++		RE_NUM_SAMPLES + 1, ++		&reut_addr, ++		0, ++		&wdb_pattern, ++		0, ++		8); ++ ++	for (uint8_t channel = 0; channel < NUM_CHANNELS; channel++) { ++		if (!does_ch_exist(ctrl, channel)) ++			continue; ++ ++		for (uint8_t byte = 0; byte < ctrl->lanes; byte++) { ++			union ddr_data_control_2_reg data_control_2 = { ++				.raw = ctrl->dq_control_2[channel][byte], ++			}; ++			data_control_2.force_rx_on = 1; ++			mchbar_write32(DQ_CONTROL_2(channel, byte), data_control_2.raw); ++		} ++		union ddr_data_control_0_reg data_control_0 = { ++			.raw = ctrl->dq_control_0[channel], ++		}; ++		if (ctrl->lpddr) { ++			/** ++			 * W/A for b4618574 - @todo: remove for HSW ULT C0 ++			 * Can't have force_odt_on together with leaker, disable LPDDR ++			 * mode during this training step. lpddr_mode is restored ++			 * at the end of this function from the host structure. ++			 */ ++			data_control_0.lpddr_mode = 0; ++			mchbar_write32(DDR_DATA_ch_CONTROL_0(channel), data_control_0.raw); ++		} ++		data_control_0.force_odt_on     = 1; ++		data_control_0.rl_training_mode = 1; ++		mchbar_write32(DDR_DATA_ch_CONTROL_0(channel), data_control_0.raw); ++		mchbar_write32(SC_IO_LATENCY_ch(channel), (union sc_io_latency_reg) { ++			.rt_iocomp = rt_io_comp, ++		}.raw); ++	} ++	enum raminit_status status = RAMINIT_STATUS_SUCCESS; ++	for (uint8_t rank = 0; rank < NUM_SLOTRANKS; rank++) { ++		if (!does_rank_exist(ctrl, rank)) ++			continue; ++ ++		/* ++		 * Set initial roundtrip latency values. Assume -4 QCLK for worst board ++		 * layout. This is calculated as HW_ROUNDT_LAT_DEFAULT_VALUE plus: ++		 * ++		 *   DDR3: Default + (2 * tAA) + 4 QCLK + PI_CLK + N-mode value * 2 ++		 * LPDDR3: Default + (2 * tAA) + 4 QCLK + PI_CLK + tDQSCK_max ++		 * ++		 * N-mode is 3 during training mode. Both channels use the same timings. ++		 */ ++		/** TODO: differs for LPDDR **/ ++		const uint32_t tmp = MAX(ctrl->multiplier, 4) + 5 + 2 * ctrl->tAA; ++		const uint32_t initial_rt_latency = MIN(rt_latency + tmp, 0x3f); ++ ++		uint8_t chanmask = 0; ++		for (uint8_t channel = 0; channel < NUM_CHANNELS; channel++) { ++			chanmask |= select_reut_ranks(ctrl, channel, BIT(rank)); ++			if (!rank_in_ch(ctrl, rank, channel)) ++				continue; ++ ++			ctrl->io_latency[channel][rank] = 0; ++			mchbar_write8(SC_ROUNDT_LAT_ch(channel) + rank, initial_rt_latency); ++			ctrl->rt_latency[channel][rank] = initial_rt_latency; ++		} ++ ++		printk(BIOS_DEBUG, "Rank %u\n", rank); ++		printk(BIOS_DEBUG, "Steps 1 and 2: Find middle of high region\n"); ++		printk(RCVEN_PLOT, "Byte"); ++		for (uint8_t channel = 0; channel < NUM_CHANNELS; channel++) { ++			if (!rank_in_ch(ctrl, rank, channel)) ++				continue; ++ ++			printk(RCVEN_PLOT, "\t"); ++			for (uint8_t byte = 0; byte < ctrl->lanes; byte++) ++				printk(RCVEN_PLOT, "%u ", byte); ++		} ++		printk(RCVEN_PLOT, "\nRcvEn\n"); ++		struct phase_train_data region_data[NUM_CHANNELS][NUM_LANES] = { 0 }; ++		for (uint16_t rl_delay = RL_START; rl_delay < RL_STOP; rl_delay += RL_STEP) { ++			printk(RCVEN_PLOT, " % 3d", rl_delay); ++			program_rl_delays(ctrl, rank, rl_delay); ++			run_io_test(ctrl, chanmask, BASIC_VA, true); ++			for (uint8_t channel = 0; channel < NUM_CHANNELS; channel++) { ++				if (!rank_in_ch(ctrl, rank, channel)) ++					continue; ++ ++				printk(RCVEN_PLOT, "\t"); ++				for (uint8_t byte = 0; byte < ctrl->lanes; byte++) { ++					const bool high = sample_dqs(channel, byte); ++					printk(RCVEN_PLOT, high ? ". " : "# "); ++					phase_record_pass( ++						®ion_data[channel][byte], ++						high, ++						rl_delay, ++						RL_START, ++						RL_STEP); ++				} ++			} ++			printk(RCVEN_PLOT, "\n"); ++		} ++		printk(RCVEN_PLOT, "\n"); ++		printk(BIOS_DEBUG, "Update RcvEn timing to be in the center of high region\n"); ++		for (uint8_t channel = 0; channel < NUM_CHANNELS; channel++) { ++			if (!rank_in_ch(ctrl, rank, channel)) ++				continue; ++ ++			printk(BIOS_DEBUG, "C%u.R%u: \tLeft\tRight\tWidth\tCenter\n", ++				channel, rank); ++			for (uint8_t byte = 0; byte < ctrl->lanes; byte++) { ++				struct phase_train_data *const curr_data = ++						®ion_data[channel][byte]; ++				phase_append_current_to_initial(curr_data, RL_START, RL_STEP); ++				const int32_t lwidth = range_width(curr_data->largest); ++				const int32_t center = range_center(curr_data->largest); ++				printk(BIOS_DEBUG, "   B%u: \t%d\t%d\t%d\t%d\n", ++					byte, ++					curr_data->largest.start, ++					curr_data->largest.end, ++					lwidth, ++					center); ++ ++				status = verify_high_region(center, lwidth); ++				if (status) { ++					printk(BIOS_ERR, ++						"RcvEn problems on channel %u, byte %u\n", ++						channel, byte); ++					goto clean_up; ++				} ++				ctrl->rcven[channel][rank][byte] = center; ++				update_rxt(ctrl, channel, rank, byte, RXT_RESTORE, 0); ++			} ++			printk(BIOS_DEBUG, "\n"); ++		} ++ ++		printk(BIOS_DEBUG, "Step 3: Quarter preamble - Walk backwards\n"); ++		printk(RCVEN_PLOT, "Byte"); ++		for (uint8_t channel = 0; channel < NUM_CHANNELS; channel++) { ++			if (!rank_in_ch(ctrl, rank, channel)) ++				continue; ++ ++			printk(RCVEN_PLOT, "\t"); ++			for (uint8_t byte = 0; byte < ctrl->lanes; byte++) ++				printk(RCVEN_PLOT, "%u ", byte); ++		} ++		printk(RCVEN_PLOT, "\nIOLAT\n"); ++		bool done = false; ++		while (!done) { ++			run_io_test(ctrl, chanmask, BASIC_VA, true); ++			done = true; ++			for (uint8_t channel = 0; channel < NUM_CHANNELS; channel++) { ++				if (!rank_in_ch(ctrl, rank, channel)) ++					continue; ++ ++				printk(RCVEN_PLOT, "  %2u\t", ctrl->io_latency[channel][rank]); ++				uint16_t highs = 0; ++				for (uint8_t byte = 0; byte < ctrl->lanes; byte++) { ++					const bool high = sample_dqs(channel, byte); ++					printk(RCVEN_PLOT, high ? "H " : "L "); ++					if (high) ++						highs |= BIT(byte); ++				} ++				if (!highs) ++					continue; ++ ++				done = false; ++ ++				/* If all bytes sample high, adjust timing globally */ ++				if (highs == bytemask && ctrl->io_latency[channel][rank] < 14) { ++					ctrl->io_latency[channel][rank] += 2; ++					ctrl->io_latency[channel][rank] %= 16; ++					program_io_latency(ctrl, channel, rank); ++					continue; ++				} ++ ++				/* Otherwise, adjust individual bytes */ ++				for (uint8_t byte = 0; byte < ctrl->lanes; byte++) { ++					if (!(highs & BIT(byte))) ++						continue; ++ ++					if (ctrl->rcven[channel][rank][byte] < 128) { ++						printk(BIOS_ERR, ++							"RcvEn underflow: walking backwards\n"); ++						printk(BIOS_ERR, ++							"For channel %u, rank %u, byte %u\n", ++							channel, rank, byte); ++						status = RAMINIT_STATUS_RCVEN_FAILURE; ++						goto clean_up; ++					} ++					ctrl->rcven[channel][rank][byte] -= 128; ++					update_rxt(ctrl, channel, rank, byte, RXT_RESTORE, 0); ++				} ++			} ++			printk(RCVEN_PLOT, "\n"); ++		} ++		for (uint8_t channel = 0; channel < NUM_CHANNELS; channel++) { ++			if (!rank_in_ch(ctrl, rank, channel)) ++				continue; ++ ++			printk(BIOS_DEBUG, "\nC%u:  Preamble\n", channel); ++			for (uint8_t byte = 0; byte < ctrl->lanes; byte++) { ++				printk(BIOS_DEBUG, ++					" B%u: %u\n", byte, ctrl->rcven[channel][rank][byte]); ++			} ++		} ++		printk(BIOS_DEBUG, "\n"); ++ ++		printk(BIOS_DEBUG, "Step 4: Add 1 qclk\n"); ++		for (uint8_t channel = 0; channel < NUM_CHANNELS; channel++) { ++			if (!rank_in_ch(ctrl, rank, channel)) ++				continue; ++ ++			for (uint8_t byte = 0; byte < ctrl->lanes; byte++) { ++				ctrl->rcven[channel][rank][byte] += 64; ++				update_rxt(ctrl, channel, rank, byte, RXT_RESTORE, 0); ++			} ++		} ++		printk(BIOS_DEBUG, "\n"); ++ ++		printk(BIOS_DEBUG, "Step 5: Walk forward to find rising edge\n"); ++		printk(RCVEN_PLOT, "Byte"); ++		for (uint8_t channel = 0; channel < NUM_CHANNELS; channel++) { ++			if (!rank_in_ch(ctrl, rank, channel)) ++				continue; ++ ++			printk(RCVEN_PLOT, "\t"); ++			for (uint8_t byte = 0; byte < ctrl->lanes; byte++) ++				printk(RCVEN_PLOT, "%u ", byte); ++		} ++		printk(RCVEN_PLOT, "\n inc\n"); ++		uint16_t ch_result[NUM_CHANNELS] = { 0 }; ++		uint8_t inc_preamble[NUM_CHANNELS][NUM_LANES] = { 0 }; ++		for (uint8_t inc = 0; inc < 64; inc += fine_step) { ++			printk(RCVEN_PLOT, " %2u\t", inc); ++			run_io_test(ctrl, chanmask, BASIC_VA, true); ++			done = true; ++			for (uint8_t channel = 0; channel < NUM_CHANNELS; channel++) { ++				if (!rank_in_ch(ctrl, rank, channel)) ++					continue; ++ ++				for (uint8_t byte = 0; byte < ctrl->lanes; byte++) { ++					if (ch_result[channel] & BIT(byte)) { ++						/* Skip bytes that are already done */ ++						printk(RCVEN_PLOT, ". "); ++						continue; ++					} ++					const bool pass = sample_dqs(channel, byte); ++					printk(RCVEN_PLOT, pass ? ". " : "# "); ++					if (pass) { ++						ch_result[channel] |= BIT(byte); ++						continue; ++					} ++					ctrl->rcven[channel][rank][byte] += fine_step; ++					update_rxt(ctrl, channel, rank, byte, RXT_RESTORE, 0); ++					inc_preamble[channel][byte] = inc; ++				} ++				printk(RCVEN_PLOT, "\t"); ++				if (ch_result[channel] != bytemask) ++					done = false; ++			} ++			printk(RCVEN_PLOT, "\n"); ++			if (done) ++				break; ++		} ++		printk(BIOS_DEBUG, "\n"); ++		if (!done) { ++			printk(BIOS_ERR, "Error: Preamble edge not found for all bytes\n"); ++			printk(BIOS_ERR, "The final RcvEn results are as follows:\n"); ++			for (uint8_t channel = 0; channel < NUM_CHANNELS; channel++) { ++				if (!rank_in_ch(ctrl, rank, channel)) ++					continue; ++ ++				printk(BIOS_ERR, "Channel %u Rank %u:  preamble\n", ++					channel, rank); ++				for (uint8_t byte = 0; byte < ctrl->lanes; byte++) { ++					printk(BIOS_ERR, " Byte %u: %u%s\n", byte, ++						ctrl->rcven[channel][rank][byte], ++						(ch_result[channel] ^ bytemask) & BIT(byte) ++							? "" ++							: " *** Check this byte! ***"); ++				} ++			} ++			status = RAMINIT_STATUS_RCVEN_FAILURE; ++			goto clean_up; ++		} ++ ++		printk(BIOS_DEBUG, "Step 6: center on preamble and clean up rank\n"); ++		for (uint8_t channel = 0; channel < NUM_CHANNELS; channel++) { ++			if (!rank_in_ch(ctrl, rank, channel)) ++				continue; ++ ++			printk(BIOS_DEBUG, "C%u:  Preamble increment\n", channel); ++			for (uint8_t byte = 0; byte < ctrl->lanes; byte++) { ++				/* ++				 * For Traditional, pull in RcvEn by 64. For ULT, take the DQS ++				 * drift into account to the specified guardband: tDQSCK_DRIFT. ++				 */ ++				ctrl->rcven[channel][rank][byte] -= tDQSCK_DRIFT; ++				update_rxt(ctrl, channel, rank, byte, RXT_RESTORE, 0); ++				printk(BIOS_DEBUG, " B%u: %u      %u\n", byte, ++					ctrl->rcven[channel][rank][byte], ++					inc_preamble[channel][byte]); ++			} ++			printk(BIOS_DEBUG, "\n"); ++		} ++		printk(BIOS_DEBUG, "\n"); ++	} ++ ++clean_up: ++	for (uint8_t channel = 0; channel < NUM_CHANNELS; channel++) { ++		if (!does_ch_exist(ctrl, channel)) ++			continue; ++ ++		if (ctrl->lpddr) { ++			/** ++			 * W/A for b4618574 - @todo: remove for HSW ULT C0 ++			 * Can't have force_odt_on together with leaker, disable LPDDR mode for ++			 * this training step. This write will disable force_odt_on while still ++			 * keeping LPDDR mode disabled. Second write will restore LPDDR mode. ++			 */ ++			union ddr_data_control_0_reg data_control_0 = { ++				.raw = ctrl->dq_control_0[channel], ++			}; ++			data_control_0.lpddr_mode = 0; ++			mchbar_write32(DDR_DATA_ch_CONTROL_0(channel), data_control_0.raw); ++		} ++		mchbar_write32(DDR_DATA_ch_CONTROL_0(channel), ctrl->dq_control_0[channel]); ++		for (uint8_t byte = 0; byte < ctrl->lanes; byte++) { ++			mchbar_write32(DQ_CONTROL_2(channel, byte), ++					ctrl->dq_control_2[channel][byte]); ++		} ++	} ++	io_reset(); ++	if (status) ++		return status; ++ ++	printk(BIOS_DEBUG, "Step 7: Sync IO latency across all ranks\n"); ++	for (uint8_t channel = 0; channel < NUM_CHANNELS; channel++) { ++		if (!does_ch_exist(ctrl, channel)) ++			continue; ++ ++		status = change_rcven_timing(ctrl, channel); ++		if (status) ++			return status; ++	} ++	printk(BIOS_DEBUG, "\nFinal Receive Enable and IO latency settings:\n"); ++	for (uint8_t channel = 0; channel < NUM_CHANNELS; channel++) { ++		if (!does_ch_exist(ctrl, channel)) ++			continue; ++ ++		for (uint8_t rank = 0; rank < NUM_SLOTRANKS; rank++) { ++			if (!rank_in_ch(ctrl, rank, channel)) ++				continue; ++ ++			const union sc_io_latency_reg sc_io_latency = { ++				.raw = mchbar_read32(SC_IO_LATENCY_ch(channel)), ++			}; ++			printk(BIOS_DEBUG, "  C%u.R%u: IOLAT = %u  rt_iocomp = %u\n", channel, ++				rank, ctrl->io_latency[channel][rank], sc_io_latency.rt_iocomp); ++			for (uint8_t byte = 0; byte < ctrl->lanes; byte++) { ++				printk(BIOS_DEBUG, "   B%u:   %u\n", byte, ++					ctrl->rcven[channel][rank][byte]); ++			} ++			printk(BIOS_DEBUG, "\n"); ++		} ++	} ++	return status; ++} +diff --git a/src/northbridge/intel/haswell/registers/mchbar.h b/src/northbridge/intel/haswell/registers/mchbar.h +index a81559bb1e..9172d4f2b0 100644 +--- a/src/northbridge/intel/haswell/registers/mchbar.h ++++ b/src/northbridge/intel/haswell/registers/mchbar.h +@@ -18,6 +18,8 @@ + #define RX_TRAIN_ch_r_b(ch, rank, byte)		_DDRIO_C_R_B(0x0000, ch, rank, byte) + #define TX_TRAIN_ch_r_b(ch, rank, byte)		_DDRIO_C_R_B(0x0020, ch, rank, byte) +  ++#define DDR_DATA_TRAIN_FEEDBACK(ch, byte)	_DDRIO_C_R_B(0x0054, ch, 0, byte) ++ + #define DQ_CONTROL_2(ch, byte)			_DDRIO_C_R_B(0x0064, ch, 0, byte) + #define DQ_CONTROL_0(ch, byte)			_DDRIO_C_R_B(0x0074, ch, 0, byte) +  +@@ -100,6 +102,7 @@ + #define COMMAND_RATE_LIMIT_ch(ch)		_MCMAIN_C(0x4010, ch) + #define TC_BANK_RANK_D_ch(ch)			_MCMAIN_C(0x4014, ch) + #define SC_ROUNDT_LAT_ch(ch)			_MCMAIN_C(0x4024, ch) ++#define SC_IO_LATENCY_ch(ch)			_MCMAIN_C(0x4028, ch) +  + #define REUT_ch_PAT_WDB_CL_MUX_CFG(ch)		_MCMAIN_C(0x4040, ch) +  +--  +2.39.2 + diff --git a/config/coreboot/haswell/patches/0022-haswell-NRI-Add-function-to-change-margins.patch b/config/coreboot/haswell/patches/0022-haswell-NRI-Add-function-to-change-margins.patch new file mode 100644 index 00000000..2e6de17c --- /dev/null +++ b/config/coreboot/haswell/patches/0022-haswell-NRI-Add-function-to-change-margins.patch @@ -0,0 +1,272 @@ +From 8c3874195c0fc1af9d0b84611496689da1c19d8c Mon Sep 17 00:00:00 2001 +From: Angel Pons <th3fanbus@gmail.com> +Date: Sun, 8 May 2022 11:58:59 +0200 +Subject: [PATCH 22/26] haswell NRI: Add function to change margins + +Implement a function to change margin parameters. Haswell provides a +register to apply an offset to margin parameters during training, so +make use of it. There are other margin parameters that have not been +implemented yet, as they are not needed for now and special handling +is needed to provide offset training functionality. + +Change-Id: I5392380e13de3c44e77b7bc9f3b819e2661d1e2d +Signed-off-by: Angel Pons <th3fanbus@gmail.com> +--- + .../haswell/native_raminit/change_margin.c    | 136 ++++++++++++++++++ + .../haswell/native_raminit/raminit_native.h   |  39 +++++ + .../haswell/native_raminit/reg_structs.h      |  12 ++ + .../intel/haswell/registers/mchbar.h          |   1 + + 4 files changed, 188 insertions(+) + +diff --git a/src/northbridge/intel/haswell/native_raminit/change_margin.c b/src/northbridge/intel/haswell/native_raminit/change_margin.c +index 12da59580f..4ba9cfa5c6 100644 +--- a/src/northbridge/intel/haswell/native_raminit/change_margin.c ++++ b/src/northbridge/intel/haswell/native_raminit/change_margin.c +@@ -1,5 +1,6 @@ + /* SPDX-License-Identifier: GPL-2.0-or-later */ +  ++#include <assert.h> + #include <commonlib/clamp.h> + #include <console/console.h> + #include <delay.h> +@@ -152,3 +153,138 @@ void download_regfile( + 	ddr_data_control_0.read_rf_rank = phys_rank; + 	mchbar_write32(reg, ddr_data_control_0.raw); + } ++ ++static void update_data_offset_train( ++	struct sysinfo *ctrl, ++	const uint8_t param, ++	const uint8_t en_multicast, ++	const uint8_t channel_in, ++	const uint8_t rank, ++	const uint8_t byte_in, ++	const bool update_ctrl, ++	const enum regfile_mode regfile, ++	const uint32_t value) ++{ ++	bool is_rd = false; ++	bool is_wr = false; ++	switch (param) { ++	case RdT: ++	case RdV: ++	case RcvEna: ++		is_rd = true; ++		break; ++	case WrT: ++	case WrDqsT: ++		is_wr = true; ++		break; ++	default: ++		die("%s: Invalid margin parameter %u\n", __func__, param); ++	} ++	if (en_multicast) { ++		mchbar_write32(DDR_DATA_OFFSET_TRAIN, value); ++		for (uint8_t channel = 0; channel < NUM_CHANNELS; channel++) { ++			if (!does_ch_exist(ctrl, channel)) ++				continue; ++ ++			download_regfile(ctrl, channel, true, rank, regfile, 0, is_rd, is_wr); ++			if (update_ctrl) { ++				for (uint8_t byte = 0; byte < ctrl->lanes; byte++) ++					ctrl->data_offset_train[channel][byte] = value; ++			} ++		} ++	} else { ++		mchbar_write32(DDR_DATA_OFFSET_TRAIN_ch_b(channel_in, byte_in), value); ++		download_regfile(ctrl, channel_in, false, rank, regfile, byte_in, is_rd, is_wr); ++		if (update_ctrl) ++			ctrl->data_offset_train[channel_in][byte_in] = value; ++	} ++} ++ ++static uint32_t get_max_margin(const enum margin_parameter param) ++{ ++	switch (param) { ++	case RcvEna: ++	case RdT: ++	case WrT: ++	case WrDqsT: ++		return MAX_POSSIBLE_TIME; ++	case RdV: ++		return MAX_POSSIBLE_VREF; ++	default: ++		die("%s: Invalid margin parameter %u\n", __func__, param); ++	} ++} ++ ++void change_margin( ++	struct sysinfo *ctrl, ++	const enum margin_parameter param, ++	const int32_t value0, ++	const bool en_multicast, ++	const uint8_t channel, ++	const uint8_t rank, ++	const uint8_t byte, ++	const bool update_ctrl, ++	const enum regfile_mode regfile) ++{ ++	/** FIXME: Remove this **/ ++	if (rank == 0xff) ++		die("%s: rank is 0xff\n", __func__); ++ ++	if (!en_multicast && !does_ch_exist(ctrl, channel)) ++		die("%s: Tried to change margin of empty channel %u\n", __func__, channel); ++ ++	const uint32_t max_value = get_max_margin(param); ++	const int32_t v0 = clamp_s32(-max_value, value0, max_value); ++ ++	union ddr_data_offset_train_reg ddr_data_offset_train = { ++		.raw = en_multicast ? 0 : ctrl->data_offset_train[channel][byte], ++	}; ++	bool update_offset_train = false; ++	switch (param) { ++	case RcvEna: ++		ddr_data_offset_train.rcven = v0; ++		update_offset_train = true; ++		break; ++	case RdT: ++		ddr_data_offset_train.rx_dqs = v0; ++		update_offset_train = true; ++		break; ++	case WrT: ++		ddr_data_offset_train.tx_dq = v0; ++		update_offset_train = true; ++		break; ++	case WrDqsT: ++		ddr_data_offset_train.tx_dqs = v0; ++		update_offset_train = true; ++		break; ++	case RdV: ++		ddr_data_offset_train.vref = v0; ++		update_offset_train = true; ++		break; ++	default: ++		die("%s: Invalid margin parameter %u\n", __func__, param); ++	} ++	if (update_offset_train) { ++		update_data_offset_train( ++			ctrl, ++			param, ++			en_multicast, ++			channel, ++			rank, ++			byte, ++			update_ctrl, ++			regfile, ++			ddr_data_offset_train.raw); ++	} ++} ++ ++void change_1d_margin_multicast( ++	struct sysinfo *ctrl, ++	const enum margin_parameter param, ++	const int32_t value0, ++	const uint8_t rank, ++	const bool update_ctrl, ++	const enum regfile_mode regfile) ++{ ++	change_margin(ctrl, param, value0, true, 0, rank, 0, update_ctrl, regfile); ++} +diff --git a/src/northbridge/intel/haswell/native_raminit/raminit_native.h b/src/northbridge/intel/haswell/native_raminit/raminit_native.h +index a36ebfacd1..500fc28909 100644 +--- a/src/northbridge/intel/haswell/native_raminit/raminit_native.h ++++ b/src/northbridge/intel/haswell/native_raminit/raminit_native.h +@@ -35,6 +35,18 @@ +  + #define RTTNOM_MASK		(BIT(9) | BIT(6) | BIT(2)) +  ++/* Margin parameter limits */ ++#define MAX_POSSIBLE_TIME	31 ++#define MAX_POSSIBLE_VREF	54 ++ ++#define MAX_POSSIBLE_BOTH	MAX_POSSIBLE_VREF ++ ++#define MIN_TIME		(-MAX_POSSIBLE_TIME) ++#define MAX_TIME		(MAX_POSSIBLE_TIME) ++ ++#define MIN_VREF		(-MAX_POSSIBLE_VREF) ++#define MAX_VREF		(MAX_POSSIBLE_VREF) ++ + #define BASIC_VA_PAT_SPREAD_8	0x01010101 +  + #define WDB_CACHE_LINE_SIZE	8 +@@ -45,6 +57,14 @@ + /* Specified in PI ticks. 64 PI ticks == 1 qclk */ + #define tDQSCK_DRIFT		64 +  ++enum margin_parameter { ++	RcvEna, ++	RdT, ++	WrT, ++	WrDqsT, ++	RdV, ++}; ++ + /* ZQ calibration types */ + enum { + 	ZQ_INIT,	/* DDR3: ZQCL with tZQinit, LPDDR3: ZQ Init  with tZQinit  */ +@@ -516,6 +536,25 @@ void download_regfile( + 	bool read_rf_rd, + 	bool read_rf_wr); +  ++void change_margin( ++	struct sysinfo *ctrl, ++	const enum margin_parameter param, ++	const int32_t value0, ++	const bool en_multicast, ++	const uint8_t channel, ++	const uint8_t rank, ++	const uint8_t byte, ++	const bool update_ctrl, ++	const enum regfile_mode regfile); ++ ++void change_1d_margin_multicast( ++	struct sysinfo *ctrl, ++	const enum margin_parameter param, ++	const int32_t value0, ++	const uint8_t rank, ++	const bool update_ctrl, ++	const enum regfile_mode regfile); ++ + uint8_t get_rx_bias(const struct sysinfo *ctrl); +  + uint8_t get_tCWL(uint32_t mem_clock_mhz); +diff --git a/src/northbridge/intel/haswell/native_raminit/reg_structs.h b/src/northbridge/intel/haswell/native_raminit/reg_structs.h +index b099f4bb82..a0e36ed082 100644 +--- a/src/northbridge/intel/haswell/native_raminit/reg_structs.h ++++ b/src/northbridge/intel/haswell/native_raminit/reg_structs.h +@@ -25,6 +25,18 @@ union ddr_data_tx_train_rank_reg { + 	uint32_t raw; + }; +  ++union ddr_data_offset_train_reg { ++	struct __packed { ++		int32_t rcven  : 6; // Bits  5:0 ++		int32_t rx_dqs : 6; // Bits 11:6 ++		int32_t tx_dq  : 6; // Bits 17:12 ++		int32_t tx_dqs : 6; // Bits 23:18 ++		int32_t vref   : 7; // Bits 30:24 ++		int32_t        : 1; // Bits 31:31 ++	}; ++	uint32_t raw; ++}; ++ + union ddr_data_control_0_reg { + 	struct __packed { + 		uint32_t rx_training_mode      : 1; // Bits  0:0 +diff --git a/src/northbridge/intel/haswell/registers/mchbar.h b/src/northbridge/intel/haswell/registers/mchbar.h +index 9172d4f2b0..0acafbc826 100644 +--- a/src/northbridge/intel/haswell/registers/mchbar.h ++++ b/src/northbridge/intel/haswell/registers/mchbar.h +@@ -21,6 +21,7 @@ + #define DDR_DATA_TRAIN_FEEDBACK(ch, byte)	_DDRIO_C_R_B(0x0054, ch, 0, byte) +  + #define DQ_CONTROL_2(ch, byte)			_DDRIO_C_R_B(0x0064, ch, 0, byte) ++#define DDR_DATA_OFFSET_TRAIN_ch_b(ch, byte)	_DDRIO_C_R_B(0x0070, ch, 0, byte) + #define DQ_CONTROL_0(ch, byte)			_DDRIO_C_R_B(0x0074, ch, 0, byte) +  + /* DDR CKE per-channel */ +--  +2.39.2 + diff --git a/config/coreboot/haswell/patches/0023-haswell-NRI-Add-read-MPR-training.patch b/config/coreboot/haswell/patches/0023-haswell-NRI-Add-read-MPR-training.patch new file mode 100644 index 00000000..b13eb2db --- /dev/null +++ b/config/coreboot/haswell/patches/0023-haswell-NRI-Add-read-MPR-training.patch @@ -0,0 +1,331 @@ +From 6781cec818501f7afd6ee26464fd4556ac3068cb Mon Sep 17 00:00:00 2001 +From: Angel Pons <th3fanbus@gmail.com> +Date: Sun, 8 May 2022 11:35:49 +0200 +Subject: [PATCH 23/26] haswell NRI: Add read MPR training + +Implement read training using DDR3 MPR (Multi-Purpose Register). + +Change-Id: Id17cb2c4c399ac9bcc937b595b58f863c152461b +Signed-off-by: Angel Pons <th3fanbus@gmail.com> +--- + .../intel/haswell/native_raminit/Makefile.inc |   1 + + .../haswell/native_raminit/raminit_main.c     |   1 + + .../haswell/native_raminit/raminit_native.h   |   4 + + .../haswell/native_raminit/train_read_mpr.c   | 240 ++++++++++++++++++ + .../intel/haswell/registers/mchbar.h          |   2 +- + 5 files changed, 247 insertions(+), 1 deletion(-) + create mode 100644 src/northbridge/intel/haswell/native_raminit/train_read_mpr.c + +diff --git a/src/northbridge/intel/haswell/native_raminit/Makefile.inc b/src/northbridge/intel/haswell/native_raminit/Makefile.inc +index e2fbfb4211..c442be0728 100644 +--- a/src/northbridge/intel/haswell/native_raminit/Makefile.inc ++++ b/src/northbridge/intel/haswell/native_raminit/Makefile.inc +@@ -16,4 +16,5 @@ romstage-y += setup_wdb.c + romstage-y += spd_bitmunching.c + romstage-y += testing_io.c + romstage-y += timings_refresh.c ++romstage-y += train_read_mpr.c + romstage-y += train_receive_enable.c +diff --git a/src/northbridge/intel/haswell/native_raminit/raminit_main.c b/src/northbridge/intel/haswell/native_raminit/raminit_main.c +index 7d444659c3..264d1468f5 100644 +--- a/src/northbridge/intel/haswell/native_raminit/raminit_main.c ++++ b/src/northbridge/intel/haswell/native_raminit/raminit_main.c +@@ -61,6 +61,7 @@ static const struct task_entry cold_boot[] = { + 	{ do_jedec_init,                                          true, "JEDECINIT",  }, + 	{ pre_training,                                           true, "PRETRAIN",   }, + 	{ train_receive_enable,                                   true, "RCVET",      }, ++	{ train_read_mpr,                                         true, "RDMPRT",     }, + }; +  + /* Return a generic stepping value to make stepping checks simpler */ +diff --git a/src/northbridge/intel/haswell/native_raminit/raminit_native.h b/src/northbridge/intel/haswell/native_raminit/raminit_native.h +index 500fc28909..a7551ad63c 100644 +--- a/src/northbridge/intel/haswell/native_raminit/raminit_native.h ++++ b/src/northbridge/intel/haswell/native_raminit/raminit_native.h +@@ -27,6 +27,8 @@ + /* Always use 12 legs for emphasis (not trained) */ + #define TXEQFULLDRV		(3 << 4) +  ++#define LOOPCOUNT_INFINITE	0xff ++ + /* DDR3 mode register bits */ + #define MR0_DLL_RESET		BIT(8) +  +@@ -212,6 +214,7 @@ enum raminit_status { + 	RAMINIT_STATUS_POLL_TIMEOUT, + 	RAMINIT_STATUS_REUT_ERROR, + 	RAMINIT_STATUS_RCVEN_FAILURE, ++	RAMINIT_STATUS_RMPR_FAILURE, + 	RAMINIT_STATUS_UNSPECIFIED_ERROR, /** TODO: Deprecated in favor of specific values **/ + }; +  +@@ -435,6 +438,7 @@ enum raminit_status configure_mc(struct sysinfo *ctrl); + enum raminit_status configure_memory_map(struct sysinfo *ctrl); + enum raminit_status do_jedec_init(struct sysinfo *ctrl); + enum raminit_status train_receive_enable(struct sysinfo *ctrl); ++enum raminit_status train_read_mpr(struct sysinfo *ctrl); +  + void configure_timings(struct sysinfo *ctrl); + void configure_refresh(struct sysinfo *ctrl); +diff --git a/src/northbridge/intel/haswell/native_raminit/train_read_mpr.c b/src/northbridge/intel/haswell/native_raminit/train_read_mpr.c +new file mode 100644 +index 0000000000..0225e1a384 +--- /dev/null ++++ b/src/northbridge/intel/haswell/native_raminit/train_read_mpr.c +@@ -0,0 +1,240 @@ ++/* SPDX-License-Identifier: GPL-2.0-or-later */ ++ ++#include <commonlib/clamp.h> ++#include <console/console.h> ++#include <delay.h> ++#include <northbridge/intel/haswell/haswell.h> ++#include <types.h> ++ ++#include "raminit_native.h" ++#include "ranges.h" ++ ++#define RMPR_START	(-32) ++#define RMPR_STOP	(32) ++#define RMPR_STEP	1 ++ ++#define RMPR_MIN_WIDTH	12 ++ ++#define RMPR_PLOT	RAM_DEBUG ++ ++/* ++ * Clear rx_training_mode. For LPDDR, we first need to disable odt_samp_extend_en, ++ * then disable rx_training_mode, and finally re-enable odt_samp_extend_en. ++ */ ++static void clear_rx_training_mode(struct sysinfo *ctrl, const uint8_t channel) ++{ ++	for (uint8_t byte = 0; byte < ctrl->lanes; byte++) ++		mchbar_write32(DQ_CONTROL_2(channel, byte), ctrl->dq_control_2[channel][byte]); ++ ++	if (ctrl->lpddr) { ++		union ddr_data_control_0_reg data_control_0 = { ++			.raw = mchbar_read32(DDR_DATA_ch_CONTROL_0(channel)), ++		}; ++		data_control_0.odt_samp_extend_en = 0; ++		mchbar_write32(DDR_DATA_ch_CONTROL_0(channel), data_control_0.raw); ++		tick_delay(1); ++		data_control_0.rx_training_mode = 0; ++		mchbar_write32(DDR_DATA_ch_CONTROL_0(channel), data_control_0.raw); ++		tick_delay(1); ++	} ++	mchbar_write32(DDR_DATA_ch_CONTROL_0(channel), ctrl->dq_control_0[channel]); ++} ++ ++static void set_rxdqs_edges_to_midpoint(struct sysinfo *ctrl) ++{ ++	for (uint8_t channel = 0; channel < NUM_CHANNELS; channel++) { ++		if (!does_ch_exist(ctrl, channel)) ++			continue; ++ ++		for (uint8_t rank = 0; rank < NUM_SLOTRANKS; rank++) { ++			if (!rank_in_ch(ctrl, rank, channel)) ++				continue; ++ ++			for (uint8_t byte = 0; byte < ctrl->lanes; byte++) ++				update_rxt(ctrl, channel, rank, byte, RXT_RXDQS_BOTH, 32); ++		} ++	} ++} ++ ++static void enter_mpr_train_ddr_mode(struct sysinfo *ctrl, const uint8_t rank) ++{ ++	/* Program MR3 and mask RAS/WE to prevent scheduler from issuing non-read commands */ ++	for (uint8_t channel = 0; channel < NUM_CHANNELS; channel++) { ++		if (!rank_in_ch(ctrl, rank, channel)) ++			continue; ++ ++		if (!ctrl->lpddr) ++			reut_issue_mrs(ctrl, channel, BIT(rank), 3, 1 << 2); ++ ++		union reut_misc_odt_ctrl_reg reut_misc_odt_ctrl = { ++			.raw = mchbar_read32(REUT_ch_MISC_ODT_CTRL(channel)), ++		}; ++		reut_misc_odt_ctrl.mpr_train_ddr_on = 1; ++		mchbar_write32(REUT_ch_MISC_ODT_CTRL(channel), reut_misc_odt_ctrl.raw); ++	} ++} ++ ++static void leave_mpr_train_ddr_mode(struct sysinfo *ctrl, const uint8_t rank) ++{ ++	for (uint8_t channel = 0; channel < NUM_CHANNELS; channel++) { ++		if (!rank_in_ch(ctrl, rank, channel)) ++			continue; ++ ++		/* ++		 * The mpr_train_ddr_on bit will force a special command. ++		 * Therefore, clear it before issuing the MRS command. ++		 */ ++		union reut_misc_odt_ctrl_reg reut_misc_odt_ctrl = { ++			.raw = mchbar_read32(REUT_ch_MISC_ODT_CTRL(channel)), ++		}; ++		reut_misc_odt_ctrl.mpr_train_ddr_on = 0; ++		mchbar_write32(REUT_ch_MISC_ODT_CTRL(channel), reut_misc_odt_ctrl.raw); ++		if (!ctrl->lpddr) ++			reut_issue_mrs(ctrl, channel, BIT(rank), 3, 0 << 2); ++	} ++} ++ ++enum raminit_status train_read_mpr(struct sysinfo *ctrl) ++{ ++	set_rxdqs_edges_to_midpoint(ctrl); ++	clear_data_offset_train_all(ctrl); ++	setup_io_test_mpr(ctrl, ctrl->chanmap, LOOPCOUNT_INFINITE, NSOE); ++	enum raminit_status status = RAMINIT_STATUS_SUCCESS; ++	for (uint8_t rank = 0; rank < NUM_SLOTRANKS; rank++) { ++		if (!does_rank_exist(ctrl, rank)) ++			continue; ++ ++		printk(BIOS_DEBUG, "Rank %u\n", rank); ++		printk(RMPR_PLOT, "Channel"); ++		for (uint8_t channel = 0; channel < NUM_CHANNELS; channel++) { ++			if (!rank_in_ch(ctrl, rank, channel)) ++				continue; ++ ++			printk(RMPR_PLOT, "\t%u\t\t", channel); ++		} ++		printk(RMPR_PLOT, "\nByte"); ++		for (uint8_t channel = 0; channel < NUM_CHANNELS; channel++) { ++			if (!rank_in_ch(ctrl, rank, channel)) ++				continue; ++ ++			printk(RMPR_PLOT, "\t"); ++			for (uint8_t byte = 0; byte < ctrl->lanes; byte++) ++				printk(RMPR_PLOT, "%u ", byte); ++		} ++		enter_mpr_train_ddr_mode(ctrl, rank); ++		struct linear_train_data region_data[NUM_CHANNELS][NUM_LANES] = { 0 }; ++		for (uint8_t channel = 0; channel < NUM_CHANNELS; channel++) ++			select_reut_ranks(ctrl, channel, BIT(rank)); ++ ++		printk(RMPR_PLOT, "\nDqsDelay\n"); ++		int8_t dqs_delay; ++		for (dqs_delay = RMPR_START; dqs_delay < RMPR_STOP; dqs_delay += RMPR_STEP) { ++			printk(RMPR_PLOT, "% 5d", dqs_delay); ++			const enum regfile_mode regfile = REG_FILE_USE_START; ++			change_1d_margin_multicast(ctrl, RdT, dqs_delay, 0, false, regfile); ++			for (uint8_t channel = 0; channel < NUM_CHANNELS; channel++) { ++				if (!rank_in_ch(ctrl, rank, channel)) ++					continue; ++ ++				for (uint8_t byte = 0; byte < ctrl->lanes; byte++) { ++					union ddr_data_control_2_reg data_control_2 = { ++						.raw = ctrl->dq_control_2[channel][byte], ++					}; ++					data_control_2.force_bias_on = 1; ++					data_control_2.force_rx_on   = 1; ++					data_control_2.leaker_comp   = 0; ++					mchbar_write32(DQ_CONTROL_2(channel, byte), ++						data_control_2.raw); ++				} ++				union ddr_data_control_0_reg data_control_0 = { ++					.raw = ctrl->dq_control_0[channel], ++				}; ++				data_control_0.rx_training_mode   = 1; ++				data_control_0.force_odt_on       = !ctrl->lpddr; ++				data_control_0.en_read_preamble   = 0; ++				data_control_0.odt_samp_extend_en = ctrl->lpddr; ++				const uint32_t reg_offset = DDR_DATA_ch_CONTROL_0(channel); ++				mchbar_write32(reg_offset, data_control_0.raw); ++			} ++			run_mpr_io_test(false); ++			for (uint8_t channel = 0; channel < NUM_CHANNELS; channel++) { ++				if (!rank_in_ch(ctrl, rank, channel)) ++					continue; ++ ++				printk(RMPR_PLOT, "\t"); ++				for (uint8_t byte = 0; byte < ctrl->lanes; byte++) { ++					uint32_t fb = get_data_train_feedback(channel, byte); ++					const bool pass = fb == 1; ++					printk(RMPR_PLOT, pass ? ". " : "# "); ++					linear_record_pass( ++						®ion_data[channel][byte], ++						pass, ++						dqs_delay, ++						RMPR_START, ++						RMPR_STEP); ++				} ++			} ++			printk(RMPR_PLOT, "\n"); ++			for (uint8_t channel = 0; channel < NUM_CHANNELS; channel++) { ++				if (!rank_in_ch(ctrl, rank, channel)) ++					continue; ++ ++				clear_rx_training_mode(ctrl, channel); ++			} ++			io_reset(); ++		} ++		printk(RMPR_PLOT, "\n"); ++		leave_mpr_train_ddr_mode(ctrl, rank); ++		clear_data_offset_train_all(ctrl); ++		for (uint8_t channel = 0; channel < NUM_CHANNELS; channel++) { ++			if (!rank_in_ch(ctrl, rank, channel)) ++				continue; ++ ++			printk(BIOS_DEBUG, "C%u.R%u: \tLeft\tRight\tWidth\tCenter\tRxDqsPN\n", ++				channel, rank); ++			for (uint8_t byte = 0; byte < ctrl->lanes; byte++) { ++				struct linear_train_data *data = ®ion_data[channel][byte]; ++				const int32_t lwidth = range_width(data->largest); ++				if (lwidth <= RMPR_MIN_WIDTH) { ++					printk(BIOS_ERR, ++						"Bad eye (lwidth %d <= min %d) for byte %u\n", ++						lwidth, RMPR_MIN_WIDTH, byte); ++					status = RAMINIT_STATUS_RMPR_FAILURE; ++				} ++				/* ++				 * The MPR center may not be ideal on certain platforms for ++				 * unknown reasons. If so, adjust it with a magical number. ++				 * For Haswell, the magical number is zero. Hell knows why. ++				 */ ++				const int32_t center = range_center(data->largest); ++				ctrl->rxdqsp[channel][rank][byte] = center - RMPR_START; ++				ctrl->rxdqsn[channel][rank][byte] = center - RMPR_START; ++				printk(BIOS_DEBUG, "  B%u: \t%d\t%d\t%d\t%d\t%u\n", byte, ++					data->largest.start, data->largest.end, lwidth, ++					center, ctrl->rxdqsp[channel][rank][byte]); ++			} ++			printk(BIOS_DEBUG, "\n"); ++		} ++	} ++ ++	/* ++	 * Now program the DQS center values on populated ranks. data is taken from ++	 * the host struct. We need to do it after all ranks are trained, because we ++	 * need to keep the same DQS value on all ranks during the training procedure. ++	 */ ++	for (uint8_t channel = 0; channel < NUM_CHANNELS; channel++) { ++		if (!does_ch_exist(ctrl, channel)) ++			continue; ++ ++		for (uint8_t rank = 0; rank < NUM_SLOTRANKS; rank++) { ++			if (!rank_in_ch(ctrl, rank, channel)) ++				continue; ++ ++			for (uint8_t byte = 0; byte < ctrl->lanes; byte++) ++				update_rxt(ctrl, channel, rank, byte, RXT_RESTORE, 0); ++		} ++	} ++	change_1d_margin_multicast(ctrl, RdT, 0, 0, false, REG_FILE_USE_CURRENT); ++	io_reset(); ++	return status; ++} +diff --git a/src/northbridge/intel/haswell/registers/mchbar.h b/src/northbridge/intel/haswell/registers/mchbar.h +index 0acafbc826..6a31d3a32c 100644 +--- a/src/northbridge/intel/haswell/registers/mchbar.h ++++ b/src/northbridge/intel/haswell/registers/mchbar.h +@@ -122,7 +122,7 @@ + #define REUT_ch_ERR_DATA_MASK(ch)		_MCMAIN_C(0x40d8, ch) +  + #define REUT_ch_MISC_CKE_CTRL(ch)		_MCMAIN_C(0x4190, ch) +- ++#define REUT_ch_MISC_ODT_CTRL(ch)		_MCMAIN_C(0x4194, ch) + #define REUT_ch_MISC_PAT_CADB_CTRL(ch)		_MCMAIN_C(0x4198, ch) + #define REUT_ch_PAT_CADB_MRS(ch)		_MCMAIN_C(0x419c, ch) + #define REUT_ch_PAT_CADB_MUX_CTRL(ch)		_MCMAIN_C(0x41a0, ch) +--  +2.39.2 + diff --git a/config/coreboot/haswell/patches/0024-haswell-NRI-Add-write-leveling.patch b/config/coreboot/haswell/patches/0024-haswell-NRI-Add-write-leveling.patch new file mode 100644 index 00000000..59e9af9d --- /dev/null +++ b/config/coreboot/haswell/patches/0024-haswell-NRI-Add-write-leveling.patch @@ -0,0 +1,688 @@ +From 20fe4fa852d3e13851a01b51dc984ec5976c864e Mon Sep 17 00:00:00 2001 +From: Angel Pons <th3fanbus@gmail.com> +Date: Sun, 8 May 2022 12:56:04 +0200 +Subject: [PATCH 24/26] haswell NRI: Add write leveling + +Implement JEDEC write leveling, which is done in two steps. The first +step uses the JEDEC procedure to do "fine" write leveling, i.e. align +the DQS phase to the clock signal. The second step performs a regular +read-write test to correct "coarse" cycle errors. + +Change-Id: I27678523fe22c38173a688e2a4751c259a20f009 +Signed-off-by: Angel Pons <th3fanbus@gmail.com> +--- + .../intel/haswell/native_raminit/Makefile.inc |   1 + + .../haswell/native_raminit/raminit_main.c     |   1 + + .../haswell/native_raminit/raminit_native.h   |  10 + + .../train_jedec_write_leveling.c              | 580 ++++++++++++++++++ + .../intel/haswell/registers/mchbar.h          |   2 + + 5 files changed, 594 insertions(+) + create mode 100644 src/northbridge/intel/haswell/native_raminit/train_jedec_write_leveling.c + +diff --git a/src/northbridge/intel/haswell/native_raminit/Makefile.inc b/src/northbridge/intel/haswell/native_raminit/Makefile.inc +index c442be0728..40c2f5e014 100644 +--- a/src/northbridge/intel/haswell/native_raminit/Makefile.inc ++++ b/src/northbridge/intel/haswell/native_raminit/Makefile.inc +@@ -16,5 +16,6 @@ romstage-y += setup_wdb.c + romstage-y += spd_bitmunching.c + romstage-y += testing_io.c + romstage-y += timings_refresh.c ++romstage-y += train_jedec_write_leveling.c + romstage-y += train_read_mpr.c + romstage-y += train_receive_enable.c +diff --git a/src/northbridge/intel/haswell/native_raminit/raminit_main.c b/src/northbridge/intel/haswell/native_raminit/raminit_main.c +index 264d1468f5..1ff23be615 100644 +--- a/src/northbridge/intel/haswell/native_raminit/raminit_main.c ++++ b/src/northbridge/intel/haswell/native_raminit/raminit_main.c +@@ -62,6 +62,7 @@ static const struct task_entry cold_boot[] = { + 	{ pre_training,                                           true, "PRETRAIN",   }, + 	{ train_receive_enable,                                   true, "RCVET",      }, + 	{ train_read_mpr,                                         true, "RDMPRT",     }, ++	{ train_jedec_write_leveling,                             true, "JWRL",       }, + }; +  + /* Return a generic stepping value to make stepping checks simpler */ +diff --git a/src/northbridge/intel/haswell/native_raminit/raminit_native.h b/src/northbridge/intel/haswell/native_raminit/raminit_native.h +index a7551ad63c..666b233c45 100644 +--- a/src/northbridge/intel/haswell/native_raminit/raminit_native.h ++++ b/src/northbridge/intel/haswell/native_raminit/raminit_native.h +@@ -59,6 +59,9 @@ + /* Specified in PI ticks. 64 PI ticks == 1 qclk */ + #define tDQSCK_DRIFT		64 +  ++/* Maximum additional latency */ ++#define MAX_ADD_DELAY		2 ++ + enum margin_parameter { + 	RcvEna, + 	RdT, +@@ -215,6 +218,7 @@ enum raminit_status { + 	RAMINIT_STATUS_REUT_ERROR, + 	RAMINIT_STATUS_RCVEN_FAILURE, + 	RAMINIT_STATUS_RMPR_FAILURE, ++	RAMINIT_STATUS_JWRL_FAILURE, + 	RAMINIT_STATUS_UNSPECIFIED_ERROR, /** TODO: Deprecated in favor of specific values **/ + }; +  +@@ -380,6 +384,11 @@ static inline uint32_t get_data_train_feedback(const uint8_t channel, const uint + 	return mchbar_read32(DDR_DATA_TRAIN_FEEDBACK(channel, byte)); + } +  ++static inline uint16_t get_byte_group_errors(const uint8_t channel) ++{ ++	return mchbar_read32(4 + REUT_ch_ERR_MISC_STATUS(channel)) & 0x1ff; ++} ++ + /* Number of ticks to wait in units of 69.841279 ns (citation needed) */ + static inline void tick_delay(const uint32_t delay) + { +@@ -439,6 +448,7 @@ enum raminit_status configure_memory_map(struct sysinfo *ctrl); + enum raminit_status do_jedec_init(struct sysinfo *ctrl); + enum raminit_status train_receive_enable(struct sysinfo *ctrl); + enum raminit_status train_read_mpr(struct sysinfo *ctrl); ++enum raminit_status train_jedec_write_leveling(struct sysinfo *ctrl); +  + void configure_timings(struct sysinfo *ctrl); + void configure_refresh(struct sysinfo *ctrl); +diff --git a/src/northbridge/intel/haswell/native_raminit/train_jedec_write_leveling.c b/src/northbridge/intel/haswell/native_raminit/train_jedec_write_leveling.c +new file mode 100644 +index 0000000000..1ba28a3bd4 +--- /dev/null ++++ b/src/northbridge/intel/haswell/native_raminit/train_jedec_write_leveling.c +@@ -0,0 +1,580 @@ ++/* SPDX-License-Identifier: GPL-2.0-or-later */ ++ ++#include <assert.h> ++#include <console/console.h> ++#include <delay.h> ++#include <northbridge/intel/haswell/haswell.h> ++#include <types.h> ++ ++#include "raminit_native.h" ++#include "ranges.h" ++ ++#define JWLC_PLOT	RAM_DEBUG ++#define JWRL_PLOT	RAM_DEBUG ++ ++static void reset_dram_dll(struct sysinfo *ctrl, const uint8_t channel, const uint8_t rank) ++{ ++	reut_issue_mrs(ctrl, channel, BIT(rank), 0, ctrl->mr0[channel][rank] | MR0_DLL_RESET); ++} ++ ++static void program_wdb_pattern(struct sysinfo *ctrl, const bool invert) ++{ ++	/* Pattern to keep DQ-DQS simple but detect any failures. Same as NHM/WSM. */ ++	const uint8_t pat[4][2] = { ++		{ 0x00, 0xff }, ++		{ 0xff, 0x00 }, ++		{ 0xc3, 0x3c }, ++		{ 0x3c, 0xc3 }, ++	}; ++	const uint8_t pmask[2][8] = { ++		{ 0, 0, 1, 1, 1, 1, 0, 0 }, ++		{ 1, 1, 0, 0, 0, 0, 1, 1 }, ++	}; ++	for (uint8_t s = 0; s < ARRAY_SIZE(pat); s++) ++		write_wdb_fixed_pat(ctrl, pat[s], pmask[invert], ARRAY_SIZE(pmask[invert]), s); ++} ++ ++static int16_t set_add_delay(uint32_t *add_delay, uint8_t rank, int8_t target_off) ++{ ++	const uint8_t shift = rank * 2; ++	if (target_off > MAX_ADD_DELAY) { ++		*add_delay &= ~(3 << shift); ++		*add_delay |= MAX_ADD_DELAY << shift; ++		return 128 * (target_off - MAX_ADD_DELAY); ++	} else if (target_off < 0) { ++		*add_delay &= ~(3 << shift); ++		*add_delay |= 0 << shift; ++		return 128 * target_off; ++	} else { ++		*add_delay &= ~(3 << shift); ++		*add_delay |= target_off << shift; ++		return 0; ++	} ++} ++ ++static enum raminit_status train_jedec_write_leveling_cleanup(struct sysinfo *ctrl) ++{ ++	const struct reut_box reut_addr = { ++		.col = { ++			.start   = 0, ++			.stop    = 1023, ++			.inc_val = 1, ++		}, ++	}; ++	const struct wdb_pat wdb_pattern = { ++		.start_ptr  = 0, ++		.stop_ptr   = 3, ++		.inc_rate   = 1, ++		.dq_pattern = BASIC_VA, ++	}; ++	const int8_t offsets[] = { 0, 1, -1, 2, 3 }; ++	const int8_t dq_offsets[] = { 0, -10, 10, -5, 5, -15, 15 }; ++	const uint8_t dq_offset_max = ARRAY_SIZE(dq_offsets); ++ ++	/* Set LFSR seeds to be sequential */ ++	program_wdb_lfsr(ctrl, true); ++	setup_io_test( ++		ctrl, ++		ctrl->chanmap, ++		PAT_WR_RD, ++		2, ++		4, ++		&reut_addr, ++		NSOE, ++		&wdb_pattern, ++		0, ++		0); ++ ++	const union reut_pat_wdb_cl_mux_cfg_reg reut_wdb_cl_mux_cfg = { ++		.mux_0_control       = REUT_MUX_BTBUFFER, ++		.mux_1_control       = REUT_MUX_BTBUFFER, ++		.mux_2_control       = REUT_MUX_BTBUFFER, ++		.ecc_data_source_sel = 1, ++	}; ++	for (uint8_t channel = 0; channel < NUM_CHANNELS; channel++) { ++		if (!does_ch_exist(ctrl, channel)) ++			continue; ++ ++		mchbar_write32(REUT_ch_PAT_WDB_CL_MUX_CFG(channel), reut_wdb_cl_mux_cfg.raw); ++	} ++ ++	int8_t byte_off[NUM_CHANNELS][NUM_LANES] = { 0 }; ++	uint32_t add_delay[NUM_CHANNELS] = { 0 }; ++	enum raminit_status status = RAMINIT_STATUS_SUCCESS; ++	bool invert = false; ++	const uint16_t valid_byte_mask = BIT(ctrl->lanes) - 1; ++	for (uint8_t rank = 0; rank < NUM_SLOTRANKS; rank++) { ++		uint8_t chanmask = 0; ++		for (uint8_t channel = 0; channel < NUM_CHANNELS; channel++) ++			chanmask |= select_reut_ranks(ctrl, channel, BIT(rank)); ++ ++		if (!chanmask) ++			continue; ++ ++		printk(BIOS_DEBUG, "Rank %u\n", rank); ++		printk(JWLC_PLOT, "Channel"); ++		for (uint8_t channel = 0; channel < NUM_CHANNELS; channel++) { ++			if (!rank_in_ch(ctrl, rank, channel)) ++				continue; ++ ++			printk(JWLC_PLOT, "\t\t%u\t", channel); ++		} ++		printk(JWLC_PLOT, "\nByte\t"); ++		for (uint8_t channel = 0; channel < NUM_CHANNELS; channel++) { ++			if (!rank_in_ch(ctrl, rank, channel)) ++				continue; ++ ++			printk(JWLC_PLOT, "\t"); ++			for (uint8_t byte = 0; byte < ctrl->lanes; byte++) ++				printk(JWLC_PLOT, "%u ", byte); ++		} ++		printk(JWLC_PLOT, "\nDelay DqOffset"); ++		bool done = false; ++		int8_t byte_sum[NUM_CHANNELS] = { 0 }; ++		uint16_t byte_pass[NUM_CHANNELS] = { 0 }; ++		for (uint8_t off = 0; off < ARRAY_SIZE(offsets); off++) { ++			for (uint8_t channel = 0; channel < NUM_CHANNELS; channel++) { ++				if (!rank_in_ch(ctrl, rank, channel)) ++					continue; ++ ++				const int16_t global_byte_off = ++					set_add_delay(&add_delay[channel], rank, offsets[off]); ++				for (uint8_t byte = 0; byte < ctrl->lanes; byte++) { ++					update_txt(ctrl, channel, rank, byte, TXT_DQDQS_OFF, ++						global_byte_off); ++				} ++				mchbar_write32(SC_WR_ADD_DELAY_ch(channel), ++						add_delay[channel]); ++			} ++			/* Reset FIFOs and DRAM DLL (Micron workaround) */ ++			if (!ctrl->lpddr) { ++				io_reset(); ++				for (uint8_t channel = 0; channel < NUM_CHANNELS; channel++) { ++					if (!rank_in_ch(ctrl, rank, channel)) ++						continue; ++ ++					reset_dram_dll(ctrl, channel, rank); ++				} ++				udelay(1); ++			} ++			for (uint8_t dq_offset = 0; dq_offset < dq_offset_max; dq_offset++) { ++				printk(JWLC_PLOT, "\n% 3d\t% 3d", ++					offsets[off], dq_offsets[dq_offset]); ++				change_1d_margin_multicast( ++					ctrl, ++					WrT, ++					dq_offsets[dq_offset], ++					rank, ++					false, ++					REG_FILE_USE_RANK); ++ ++				/* ++				 * Re-program the WDB pattern. Change the pattern ++				 * for the next test to avoid false pass issues. ++				 */ ++				program_wdb_pattern(ctrl, invert); ++				invert = !invert; ++				run_io_test(ctrl, chanmask, BASIC_VA, true); ++				done = true; ++				for (uint8_t channel = 0; channel < NUM_CHANNELS; channel++) { ++					if (!rank_in_ch(ctrl, rank, channel)) ++						continue; ++ ++					printk(JWLC_PLOT, "\t"); ++					uint16_t result = get_byte_group_errors(channel); ++					result &= valid_byte_mask; ++ ++					/* Skip bytes that have failed or already passed */ ++					const uint16_t skip_me = result | byte_pass[channel]; ++					for (uint8_t byte = 0; byte < ctrl->lanes; byte++) { ++						const bool pass = result & BIT(byte); ++						printk(JWLC_PLOT, pass ? "# " : ". "); ++						if (skip_me & BIT(byte)) ++							continue; ++ ++						byte_pass[channel] |= BIT(byte); ++						byte_off[channel][byte] = offsets[off]; ++						byte_sum[channel] += offsets[off]; ++					} ++					if (byte_pass[channel] != valid_byte_mask) ++						done = false; ++				} ++				if (done) ++					break; ++			} ++			if (done) ++				break; ++		} ++		printk(BIOS_DEBUG, "\n\n"); ++		if (!done) { ++			printk(BIOS_ERR, "JWLC: Could not find a pass for all bytes\n"); ++			for (uint8_t channel = 0; channel < NUM_CHANNELS; channel++) { ++				if (!rank_in_ch(ctrl, rank, channel)) ++					continue; ++ ++				printk(BIOS_ERR, "Channel %u, rank %u fail:", channel, rank); ++				const uint16_t passing_mask = byte_pass[channel]; ++				for (uint8_t byte = 0; byte < ctrl->lanes; byte++) { ++					if (BIT(byte) & passing_mask) ++						continue; ++ ++					printk(BIOS_ERR, " %u", byte); ++				} ++				printk(BIOS_ERR, "\n"); ++			} ++			status = RAMINIT_STATUS_JWRL_FAILURE; ++			break; ++		} ++		for (uint8_t channel = 0; channel < NUM_CHANNELS; channel++) { ++			if (!rank_in_ch(ctrl, rank, channel)) ++				continue; ++ ++			/* Refine target offset to make sure it works for all bytes */ ++			int8_t target_off = DIV_ROUND_CLOSEST(byte_sum[channel], ctrl->lanes); ++			int16_t global_byte_off = 0; ++			uint8_t all_good_loops = 0; ++			bool all_good = 0; ++			while (!all_good) { ++				global_byte_off = ++					set_add_delay(&add_delay[channel], rank, target_off); ++				all_good = true; ++				for (uint8_t byte = 0; byte < ctrl->lanes; byte++) { ++					int16_t local_offset; ++					local_offset = byte_off[channel][byte] - target_off; ++					local_offset = local_offset * 128 + global_byte_off; ++					const uint16_t tx_dq = ctrl->tx_dq[channel][rank][byte]; ++					if (tx_dq + local_offset >= (512 - 64)) { ++						all_good = false; ++						all_good_loops++; ++						target_off++; ++						break; ++					} ++					const uint16_t txdqs = ctrl->tx_dq[channel][rank][byte]; ++					if (txdqs + local_offset < 96) { ++						all_good = false; ++						all_good_loops++; ++						target_off--; ++						break; ++					} ++				} ++				/* Avoid an infinite loop */ ++				if (all_good_loops > 3) ++					break; ++			} ++			if (!all_good) { ++				printk(BIOS_ERR, "JWLC: Target offset refining failed\n"); ++				status = RAMINIT_STATUS_JWRL_FAILURE; ++				break; ++			} ++			printk(BIOS_DEBUG, "C%u.R%u:  Offset\tFinalEdge\n", channel, rank); ++			for (uint8_t byte = 0; byte < ctrl->lanes; byte++) { ++				int16_t local_offset; ++				local_offset = byte_off[channel][byte] - target_off; ++				local_offset = local_offset * 128 + global_byte_off; ++				ctrl->tx_dq[channel][rank][byte] += local_offset; ++				ctrl->txdqs[channel][rank][byte] += local_offset; ++				update_txt(ctrl, channel, rank, byte, TXT_RESTORE, 0); ++				printk(BIOS_DEBUG, "  B%u:   %d\t%d\n", byte, local_offset, ++					ctrl->txdqs[channel][rank][byte]); ++			} ++			mchbar_write32(SC_WR_ADD_DELAY_ch(channel), add_delay[channel]); ++			if (!ctrl->lpddr) { ++				reset_dram_dll(ctrl, channel, rank); ++				udelay(1); ++			} ++			printk(BIOS_DEBUG, "\n"); ++		} ++		printk(BIOS_DEBUG, "\n"); ++	} ++ ++	/* Restore WDB after test */ ++	write_wdb_va_pat(ctrl, 0, BASIC_VA_PAT_SPREAD_8, 8, 0); ++	program_wdb_lfsr(ctrl, false); ++	mchbar_write32(DDR_DATA_OFFSET_TRAIN, 0); ++ ++	/** TODO: Do full JEDEC init instead? **/ ++	io_reset(); ++	return status; ++} ++ ++static enum raminit_status verify_wl_width(const int32_t lwidth) ++{ ++	if (lwidth <= 32) { ++		/* Check if width is valid */ ++		printk(BIOS_ERR, "WrLevel: Width region (%d) too small\n", lwidth); ++		return RAMINIT_STATUS_JWRL_FAILURE; ++	} ++	if (lwidth >= 96) { ++		/* Since we're calibrating a phase, a too large region is a problem */ ++		printk(BIOS_ERR, "WrLevel: Width region (%d) too large\n", lwidth); ++		return RAMINIT_STATUS_JWRL_FAILURE; ++	} ++	return 0; ++} ++ ++enum raminit_status train_jedec_write_leveling(struct sysinfo *ctrl) ++{ ++	/* ++	 * Enabling WL mode causes DQS to toggle for 1024 QCLK. ++	 * Wait for this to stop. Round up to nearest microsecond. ++	 */ ++	const bool wl_long_delay = ctrl->lpddr; ++	const uint32_t dqs_toggle_time = wl_long_delay ? 2048 : 1024; ++	const uint32_t wait_time_us = DIV_ROUND_UP(ctrl->qclkps * dqs_toggle_time, 1000 * 1000); ++ ++	const uint16_t wl_start = 192; ++	const uint16_t wl_stop  = 192 + 128; ++	const uint16_t wl_step  = 2; ++ ++	/* Do not use cached MR values */ ++	const bool save_restore_mrs = ctrl->restore_mrs; ++	ctrl->restore_mrs = 0; ++ ++	/* Propagate delay values (without a write command) */ ++	for (uint8_t channel = 0; channel < NUM_CHANNELS; channel++) { ++		if (!does_ch_exist(ctrl, channel)) ++			continue; ++ ++		/* Propagate delay values from rank 0 to prevent assertion failures in RTL */ ++		union ddr_data_control_0_reg data_control_0 = { ++			.raw = ctrl->dq_control_0[channel], ++		}; ++		data_control_0.read_rf_rd   = 0; ++		data_control_0.read_rf_wr   = 1; ++		data_control_0.read_rf_rank = 0; ++		mchbar_write32(DDR_DATA_ch_CONTROL_0(channel), data_control_0.raw); ++		for (uint8_t byte = 0; byte < ctrl->lanes; byte++) { ++			union ddr_data_control_2_reg data_control_2 = { ++				.raw = ctrl->dq_control_2[channel][byte], ++			}; ++			data_control_2.force_bias_on = 1; ++			data_control_2.force_rx_on   = 0; ++			data_control_2.wl_long_delay = wl_long_delay; ++			mchbar_write32(DQ_CONTROL_2(channel, byte), data_control_2.raw); ++		} ++	} ++ ++	if (ctrl->lpddr) ++		die("%s: Missing LPDDR support\n", __func__); ++ ++	if (!ctrl->lpddr) ++		ddr3_program_mr1(ctrl, 0, 1); ++ ++	enum raminit_status status = RAMINIT_STATUS_SUCCESS; ++	struct phase_train_data region_data[NUM_CHANNELS][NUM_LANES] = { 0 }; ++	for (uint8_t rank = 0; rank < NUM_SLOTRANKS; rank++) { ++		if (!does_rank_exist(ctrl, rank)) ++			continue; ++ ++		for (uint8_t channel = 0; channel < NUM_CHANNELS; channel++) { ++			if (!rank_in_ch(ctrl, rank, channel)) ++				continue; ++ ++			/** TODO: Differs for LPDDR **/ ++			uint16_t mr1reg = ctrl->mr1[channel][rank]; ++			mr1reg &= ~MR1_QOFF_ENABLE; ++			mr1reg |= MR1_WL_ENABLE; ++			if (is_hsw_ult()) { ++				mr1reg &= ~RTTNOM_MASK; ++				mr1reg |= encode_ddr3_rttnom(120); ++			} else if (ctrl->dpc[channel] == 2) { ++				mr1reg &= ~RTTNOM_MASK; ++				mr1reg |= encode_ddr3_rttnom(60); ++			} ++			reut_issue_mrs(ctrl, channel, BIT(rank), 1, mr1reg); ++ ++			/* Assert ODT for myself */ ++			uint8_t odt_matrix = BIT(rank); ++			if (ctrl->dpc[channel] == 2) { ++				/* Assert ODT for non-target DIMM */ ++				const uint8_t other_dimm = ((rank + 2) / 2) & 1; ++				odt_matrix |= BIT(2 * other_dimm); ++			} ++ ++			union reut_misc_odt_ctrl_reg reut_misc_odt_ctrl = { ++				.raw = 0, ++			}; ++			if (ctrl->lpddr) { ++				/* Only one ODT pin for ULT */ ++				reut_misc_odt_ctrl.odt_on       = 1; ++				reut_misc_odt_ctrl.odt_override = 1; ++			} else if (!is_hsw_ult()) { ++				reut_misc_odt_ctrl.odt_on       = odt_matrix; ++				reut_misc_odt_ctrl.odt_override = 0xf; ++			} ++			mchbar_write32(REUT_ch_MISC_ODT_CTRL(channel), reut_misc_odt_ctrl.raw); ++		} ++		for (uint8_t channel = 0; channel < NUM_CHANNELS; channel++) { ++			if (!rank_in_ch(ctrl, rank, channel)) ++				continue; ++ ++			/* ++			 * Enable write leveling mode in DDR and propagate delay ++			 * values (without a write command). Stay in WL mode. ++			 */ ++			union ddr_data_control_0_reg data_control_0 = { ++				.raw = ctrl->dq_control_0[channel], ++			}; ++			data_control_0.wl_training_mode = 1; ++			data_control_0.tx_pi_on         = 1; ++			data_control_0.read_rf_rd       = 0; ++			data_control_0.read_rf_wr       = 1; ++			data_control_0.read_rf_rank     = rank; ++			mchbar_write32(DDR_DATA_ch_CONTROL_0(channel), data_control_0.raw); ++		} ++		printk(BIOS_DEBUG, "\nRank %u\n", rank); ++		printk(JWRL_PLOT, "Channel\t"); ++		for (uint8_t channel = 0; channel < NUM_CHANNELS; channel++) { ++			if (!rank_in_ch(ctrl, rank, channel)) ++				continue; ++ ++			printk(JWRL_PLOT, "%u", channel); ++			if (channel > 0) ++				continue; ++ ++			for (uint8_t byte = 0; byte < ctrl->lanes; byte++) ++				printk(JWRL_PLOT, "\t"); ++		} ++		printk(JWRL_PLOT, "\nByte"); ++		for (uint8_t channel = 0; channel < NUM_CHANNELS; channel++) { ++			if (!rank_in_ch(ctrl, rank, channel)) ++				continue; ++ ++			for (uint8_t byte = 0; byte < ctrl->lanes; byte++) ++				printk(JWRL_PLOT, "\t%u", byte); ++		} ++		printk(JWRL_PLOT, "\nWlDelay"); ++		for (uint16_t wl_delay = wl_start; wl_delay < wl_stop; wl_delay += wl_step) { ++			printk(JWRL_PLOT, "\n %3u:", wl_delay); ++			for (uint8_t channel = 0; channel < NUM_CHANNELS; channel++) { ++				if (!rank_in_ch(ctrl, rank, channel)) ++					continue; ++ ++				for (uint8_t byte = 0; byte < ctrl->lanes; byte++) { ++					update_txt(ctrl, channel, rank, byte, TXT_TXDQS, ++						wl_delay); ++				} ++			} ++			/* Wait for the first burst to finish */ ++			if (wl_delay == wl_start) ++				udelay(wait_time_us); ++ ++			io_reset(); ++			udelay(wait_time_us); ++			for (uint8_t channel = 0; channel < NUM_CHANNELS; channel++) { ++				if (!rank_in_ch(ctrl, rank, channel)) ++					continue; ++ ++				for (uint8_t byte = 0; byte < ctrl->lanes; byte++) { ++					const uint32_t feedback = ++						get_data_train_feedback(channel, byte); ++					const bool pass = (feedback & 0x1ff) >= 16; ++					printk(JWRL_PLOT, "\t%c%u", pass ? '.' : '#', feedback); ++					phase_record_pass( ++						®ion_data[channel][byte], ++						pass, ++						wl_delay, ++						wl_start, ++						wl_step); ++				} ++			} ++		} ++		printk(JWRL_PLOT, "\n"); ++		printk(BIOS_DEBUG, "\n\tInitSt\tInitEn\tCurrSt\tCurrEn\tLargSt\tLargEn\n"); ++		for (uint8_t channel = 0; channel < NUM_CHANNELS; channel++) { ++			if (!rank_in_ch(ctrl, rank, channel)) ++				continue; ++ ++			printk(BIOS_DEBUG, "C%u\n", channel); ++			for (uint8_t byte = 0; byte < ctrl->lanes; byte++) { ++				struct phase_train_data *data = ®ion_data[channel][byte]; ++ ++				phase_append_initial_to_current(data, wl_start, wl_step); ++				printk(BIOS_DEBUG, "   B%u:\t%d\t%d\t%d\t%d\t%d\t%d\n", ++					byte, ++					data->initial.start, ++					data->initial.end, ++					data->current.start, ++					data->current.end, ++					data->largest.start, ++					data->largest.end); ++			} ++		} ++ ++		/* ++		 * Clean up after test. Very coarsely adjust for ++		 * any cycle errors. Program values for TxDQS. ++		 */ ++		for (uint8_t channel = 0; channel < NUM_CHANNELS; channel++) { ++			if (!rank_in_ch(ctrl, rank, channel)) ++				continue; ++ ++			/* Clear ODT before MRS (JEDEC spec) */ ++			mchbar_write32(REUT_ch_MISC_ODT_CTRL(channel), 0); ++ ++			/** TODO: Differs for LPDDR **/ ++			const uint16_t mr1reg = ctrl->mr1[channel][rank] | MR1_QOFF_ENABLE; ++			reut_issue_mrs(ctrl, channel, BIT(rank), 1, mr1reg); ++ ++			printk(BIOS_DEBUG, "\nC%u.R%u:  LftEdge Width\n", channel, rank); ++			const bool rank_x16 = ctrl->dimms[channel][rank / 2].data.width == 16; ++			for (uint8_t byte = 0; byte < ctrl->lanes; byte++) { ++				struct phase_train_data *data = ®ion_data[channel][byte]; ++				const int32_t lwidth = range_width(data->largest); ++				int32_t tx_start = data->largest.start; ++				printk(BIOS_DEBUG, "  B%u:   %d\t%d\n", byte, tx_start, lwidth); ++				status = verify_wl_width(lwidth); ++				if (status) { ++					printk(BIOS_ERR, ++						"WrLevel problems on channel %u, byte %u\n", ++						channel, byte); ++					goto clean_up; ++				} ++ ++				/* Align byte pairs if DIMM is x16 */ ++				if (rank_x16 && (byte & 1)) { ++					const struct phase_train_data *const ref_data = ++							®ion_data[channel][byte - 1]; ++ ++					if (tx_start > ref_data->largest.start + 64) ++						tx_start -= 128; ++ ++					if (tx_start < ref_data->largest.start - 64) ++						tx_start += 128; ++				} ++ ++				/* Fix for b4618067 - need to add 1 QCLK to DQS PI */ ++				if (is_hsw_ult()) ++					tx_start += 64; ++ ++				assert(tx_start >= 0); ++				ctrl->txdqs[channel][rank][byte] = tx_start; ++				ctrl->tx_dq[channel][rank][byte] = tx_start + 32; ++				update_txt(ctrl, channel, rank, byte, TXT_RESTORE, 0); ++			} ++		} ++		printk(BIOS_DEBUG, "\n"); ++	} ++ ++clean_up: ++	for (uint8_t channel = 0; channel < NUM_CHANNELS; channel++) { ++		if (!does_ch_exist(ctrl, channel)) ++			continue; ++ ++		mchbar_write32(DDR_DATA_ch_CONTROL_0(channel), ctrl->dq_control_0[channel]); ++		for (uint8_t byte = 0; byte < ctrl->lanes; byte++) { ++			mchbar_write32(DQ_CONTROL_2(channel, byte), ++				ctrl->dq_control_2[channel][byte]); ++		} ++	} ++	if (!ctrl->lpddr) ++		ddr3_program_mr1(ctrl, 0, 0); ++ ++	ctrl->restore_mrs = save_restore_mrs; ++ ++	if (status) ++		return status; ++ ++	/** TODO: If this step fails and dec_wrd is set, clear it and try again **/ ++	return train_jedec_write_leveling_cleanup(ctrl); ++} +diff --git a/src/northbridge/intel/haswell/registers/mchbar.h b/src/northbridge/intel/haswell/registers/mchbar.h +index 6a31d3a32c..7c0b5a49de 100644 +--- a/src/northbridge/intel/haswell/registers/mchbar.h ++++ b/src/northbridge/intel/haswell/registers/mchbar.h +@@ -121,6 +121,8 @@ +  + #define REUT_ch_ERR_DATA_MASK(ch)		_MCMAIN_C(0x40d8, ch) +  ++#define REUT_ch_ERR_MISC_STATUS(ch)		_MCMAIN_C(0x40e8, ch) ++ + #define REUT_ch_MISC_CKE_CTRL(ch)		_MCMAIN_C(0x4190, ch) + #define REUT_ch_MISC_ODT_CTRL(ch)		_MCMAIN_C(0x4194, ch) + #define REUT_ch_MISC_PAT_CADB_CTRL(ch)		_MCMAIN_C(0x4198, ch) +--  +2.39.2 + diff --git a/config/coreboot/haswell/patches/0025-haswell-NRI-Add-final-raminit-steps.patch b/config/coreboot/haswell/patches/0025-haswell-NRI-Add-final-raminit-steps.patch new file mode 100644 index 00000000..d15ea5d1 --- /dev/null +++ b/config/coreboot/haswell/patches/0025-haswell-NRI-Add-final-raminit-steps.patch @@ -0,0 +1,570 @@ +From d041b14f3af69db5f4598c84e3f53c9cd572ffb5 Mon Sep 17 00:00:00 2001 +From: Angel Pons <th3fanbus@gmail.com> +Date: Sun, 8 May 2022 14:29:05 +0200 +Subject: [PATCH 25/26] haswell NRI: Add final raminit steps + +Implement the remaining raminit steps. Although many training steps are +missing, this is enough to boot on the Asrock B85M Pro4. + +Change-Id: I94f3b65f0218d4da4fda4d84592dfd91f77f8f21 +Signed-off-by: Angel Pons <th3fanbus@gmail.com> +--- + src/northbridge/intel/haswell/Kconfig         |   4 +- + .../intel/haswell/native_raminit/Makefile.inc |   1 + + .../haswell/native_raminit/activate_mc.c      | 388 ++++++++++++++++++ + .../haswell/native_raminit/raminit_main.c     |   5 +- + .../haswell/native_raminit/raminit_native.c   |   5 +- + .../haswell/native_raminit/raminit_native.h   |   2 + + .../haswell/native_raminit/reg_structs.h      |  12 + + .../intel/haswell/registers/mchbar.h          |   7 + + 8 files changed, 416 insertions(+), 8 deletions(-) + create mode 100644 src/northbridge/intel/haswell/native_raminit/activate_mc.c + +diff --git a/src/northbridge/intel/haswell/Kconfig b/src/northbridge/intel/haswell/Kconfig +index b659bf6d98..61f2a3c64c 100644 +--- a/src/northbridge/intel/haswell/Kconfig ++++ b/src/northbridge/intel/haswell/Kconfig +@@ -10,12 +10,12 @@ config NORTHBRIDGE_INTEL_HASWELL + if NORTHBRIDGE_INTEL_HASWELL +  + config USE_NATIVE_RAMINIT +-	bool "[NOT WORKING] Use native raminit" ++	bool "[NOT COMPLETE] Use native raminit" + 	default n + 	select HAVE_DEBUG_RAM_SETUP + 	help + 	  Select if you want to use coreboot implementation of raminit rather than +-	  MRC.bin. Currently incomplete and does not boot. ++	  MRC.bin. Currently incomplete and does not support S3 resume. +  + config HASWELL_VBOOT_IN_BOOTBLOCK + 	depends on VBOOT +diff --git a/src/northbridge/intel/haswell/native_raminit/Makefile.inc b/src/northbridge/intel/haswell/native_raminit/Makefile.inc +index 40c2f5e014..d97da72890 100644 +--- a/src/northbridge/intel/haswell/native_raminit/Makefile.inc ++++ b/src/northbridge/intel/haswell/native_raminit/Makefile.inc +@@ -1,5 +1,6 @@ + ## SPDX-License-Identifier: GPL-2.0-or-later +  ++romstage-y += activate_mc.c + romstage-y += change_margin.c + romstage-y += configure_mc.c + romstage-y += ddr3.c +diff --git a/src/northbridge/intel/haswell/native_raminit/activate_mc.c b/src/northbridge/intel/haswell/native_raminit/activate_mc.c +new file mode 100644 +index 0000000000..78a7ad27ef +--- /dev/null ++++ b/src/northbridge/intel/haswell/native_raminit/activate_mc.c +@@ -0,0 +1,388 @@ ++/* SPDX-License-Identifier: GPL-2.0-or-later */ ++ ++#include <console/console.h> ++#include <delay.h> ++#include <device/pci_ops.h> ++#include <northbridge/intel/haswell/haswell.h> ++#include <timer.h> ++#include <types.h> ++ ++#include "raminit_native.h" ++ ++static void update_internal_clocks_on(struct sysinfo *ctrl) ++{ ++	for (uint8_t channel = 0; channel < NUM_CHANNELS; channel++) { ++		if (!does_ch_exist(ctrl, channel)) ++			continue; ++ ++		bool clocks_on = false; ++		for (uint8_t byte = 0; byte < ctrl->lanes; byte++) { ++			const union ddr_data_control_1_reg data_control_1 = { ++				.raw = ctrl->dq_control_1[channel][byte], ++			}; ++			const int8_t o_on = data_control_1.odt_delay; ++			const int8_t s_on = data_control_1.sense_amp_delay; ++			const int8_t o_off = data_control_1.odt_duration; ++			const int8_t s_off = data_control_1.sense_amp_duration; ++			if (o_on + o_off >= 7 || s_on + s_off >= 7) { ++				clocks_on = true; ++				break; ++			} ++		} ++		union ddr_data_control_0_reg data_control_0 = { ++			.raw = ctrl->dq_control_0[channel], ++		}; ++		data_control_0.internal_clocks_on = clocks_on; ++		ctrl->dq_control_0[channel] = data_control_0.raw; ++		mchbar_write32(DDR_DATA_ch_CONTROL_0(channel), data_control_0.raw); ++	} ++} ++ ++/* Switch off unused segments of the SDLL to save power */ ++static void update_sdll_length(struct sysinfo *ctrl) ++{ ++	for (uint8_t channel = 0; channel < NUM_CHANNELS; channel++) { ++		if (!does_ch_exist(ctrl, channel)) ++			continue; ++ ++		for (uint8_t byte = 0; byte < ctrl->lanes; byte++) { ++			uint8_t max_pi = 0; ++			for (uint8_t rank = 0; rank < NUM_SLOTRANKS; rank++) { ++				if (!rank_in_ch(ctrl, rank, channel)) ++					continue; ++ ++				const uint8_t rx_dqs_p = ctrl->rxdqsp[channel][rank][byte]; ++				const uint8_t rx_dqs_n = ctrl->rxdqsn[channel][rank][byte]; ++				max_pi = MAX(max_pi, MAX(rx_dqs_p, rx_dqs_n)); ++			} ++			/* Update SDLL length for power savings */ ++			union ddr_data_control_1_reg data_control_1 = { ++				.raw = ctrl->dq_control_1[channel][byte], ++			}; ++			/* Calculate which segments to turn off */ ++			data_control_1.sdll_segment_disable = (7 - (max_pi >> 3)) & ~1; ++			ctrl->dq_control_1[channel][byte] = data_control_1.raw; ++			mchbar_write32(DQ_CONTROL_1(channel, byte), data_control_1.raw); ++		} ++	} ++} ++ ++static void set_rx_clk_stg_num(struct sysinfo *ctrl, const uint8_t channel) ++{ ++	const uint8_t rcven_drift = ctrl->lpddr ? DIV_ROUND_UP(tDQSCK_DRIFT, ctrl->qclkps) : 1; ++	uint8_t max_rcven = 0; ++	for (uint8_t rank = 0; rank < NUM_SLOTRANKS; rank++) { ++		if (!rank_in_ch(ctrl, rank, channel)) ++			continue; ++ ++		for (uint8_t byte = 0; byte < ctrl->lanes; byte++) ++			max_rcven = MAX(max_rcven, ctrl->rcven[channel][rank][byte] / 64); ++	} ++	const union ddr_data_control_1_reg ddr_data_control_1 = { ++		.raw = ctrl->dq_control_1[channel][0], ++	}; ++	const bool lpddr_long_odt = ddr_data_control_1.lpddr_long_odt_en; ++	const uint8_t rcven_turnoff = max_rcven + 18 + 2 * rcven_drift + lpddr_long_odt; ++	const union ddr_data_control_0_reg ddr_data_control_0 = { ++		.raw = ctrl->dq_control_0[channel], ++	}; ++	for (uint8_t byte = 0; byte < ctrl->lanes; byte++) { ++		union ddr_data_control_2_reg ddr_data_control_2 = { ++			.raw = ctrl->dq_control_2[channel][byte], ++		}; ++		if (ddr_data_control_0.odt_samp_extend_en) { ++			if (ddr_data_control_2.rx_clk_stg_num < rcven_turnoff) ++				ddr_data_control_2.rx_clk_stg_num = rcven_turnoff; ++		} else { ++			const int8_t o_on = ddr_data_control_1.odt_delay; ++			const int8_t o_off = ddr_data_control_1.odt_duration; ++			ddr_data_control_2.rx_clk_stg_num = MAX(17, o_on + o_off + 14); ++		} ++		ctrl->dq_control_2[channel][byte] = ddr_data_control_2.raw; ++		mchbar_write32(DQ_CONTROL_2(channel, byte), ddr_data_control_2.raw); ++	} ++} ++ ++#define SELF_REFRESH_IDLE_COUNT 0x200 ++ ++static void enter_sr(void) ++{ ++	mchbar_write32(PM_SREF_CONFIG, SELF_REFRESH_IDLE_COUNT | BIT(16)); ++	udelay(1); ++} ++ ++enum power_down_mode { ++	PDM_NO_PD	= 0, ++	PDM_APD		= 1, ++	PDM_PPD		= 2, ++	PDM_PPD_DLL_OFF	= 6, ++}; ++ ++static void power_down_config(struct sysinfo *ctrl) ++{ ++	const enum power_down_mode pd_mode = ctrl->lpddr ? PDM_PPD : PDM_PPD_DLL_OFF; ++	mchbar_write32(PM_PDWN_CONFIG, pd_mode << 12 | 0x40); ++} ++ ++static void train_power_modes_post(struct sysinfo *ctrl) ++{ ++	for (uint8_t channel = 0; channel < NUM_CHANNELS; channel++) { ++		if (!does_ch_exist(ctrl, channel)) ++			continue; ++ ++		/* Adjust tCPDED and tPRPDEN */ ++		if (ctrl->mem_clock_mhz >= 933) ++			ctrl->tc_bankrank_d[channel].tCPDED = 2; ++ ++		if (ctrl->mem_clock_mhz >= 1066) ++			ctrl->tc_bankrank_d[channel].tPRPDEN = 2; ++ ++		mchbar_write32(TC_BANK_RANK_D_ch(channel), ctrl->tc_bankrank_d[channel].raw); ++	} ++	power_down_config(ctrl); ++	mchbar_write32(MCDECS_CBIT, BIT(30));	/* dis_msg_clk_gate */ ++} ++ ++static uint8_t compute_burst_end_odt_delay(const struct sysinfo *const ctrl) ++{ ++	/* Must be disabled for LPDDR */ ++	if (ctrl->lpddr) ++		return 0; ++ ++	const uint8_t beod = MIN(7, DIV_ROUND_CLOSEST(14300 * 20 / 100, ctrl->qclkps)); ++	if (beod < 3) ++		return 0; ++ ++	if (beod < 4) ++		return 4; ++ ++	return beod; ++} ++ ++static void program_burst_end_odt_delay(struct sysinfo *ctrl) ++{ ++	/* Program burst_end_odt_delay - it should be zero during training steps */ ++	const uint8_t beod = compute_burst_end_odt_delay(ctrl); ++	for (uint8_t channel = 0; channel < NUM_CHANNELS; channel++) { ++		if (!does_ch_exist(ctrl, channel)) ++			continue; ++ ++		for (uint8_t byte = 0; byte < ctrl->lanes; byte++) { ++			union ddr_data_control_1_reg ddr_data_control_1 = { ++				.raw = ctrl->dq_control_1[channel][byte], ++			}; ++			ddr_data_control_1.burst_end_odt_delay = beod; ++			ctrl->dq_control_1[channel][byte] = ddr_data_control_1.raw; ++			mchbar_write32(DQ_CONTROL_1(channel, byte), ddr_data_control_1.raw); ++		} ++	} ++} ++ ++/* ++ * Return a random value to use for scrambler seeds. Try to use RDRAND ++ * first and fall back to hardcoded values if RDRAND does not succeed. ++ */ ++static uint16_t get_random_number(const uint8_t channel) ++{ ++	/* The RDRAND instruction is only available 100k cycles after reset */ ++	for (size_t i = 0; i < 100000; i++) { ++		uint32_t status; ++		uint32_t random; ++		/** TODO: Clean up asm **/ ++		__asm__ __volatile__( ++			"\n\t .byte 0x0F, 0xC7, 0xF0" ++			"\n\t movl %%eax, %0" ++			"\n\t pushf" ++			"\n\t pop %%eax" ++			"\n\t movl %%eax, %1" ++			: "=m"(random), ++			  "=m"(status) ++			: /* No inputs */ ++			: "eax", "cc"); ++ ++		/* Only consider non-zero random values as valid */ ++		if (status & 1 && random) ++			return random; ++	} ++ ++	/* https://xkcd.com/221 */ ++	if (channel) ++		return 0x28f4; ++	else ++		return 0x893e; ++} ++ ++/* Work around "error: 'typeof' applied to a bit-field" */ ++static inline uint32_t max(const uint32_t a, const uint32_t b) ++{ ++	return MAX(a, b); ++} ++ ++enum raminit_status activate_mc(struct sysinfo *ctrl) ++{ ++	const bool enable_scrambling = true; ++	const bool enable_cmd_tristate = true; ++	for (uint8_t channel = 0; channel < NUM_CHANNELS; channel++) { ++		if (!does_ch_exist(ctrl, channel)) ++			continue; ++ ++		if (enable_scrambling && ctrl->stepping < STEPPING_C0) { ++			/* Make sure tRDRD_(sr, dr, dd) are at least 6 for scrambler W/A */ ++			union tc_bank_rank_a_reg tc_bank_rank_a = { ++				.raw = mchbar_read32(TC_BANK_RANK_A_ch(channel)), ++			}; ++			tc_bank_rank_a.tRDRD_sr = max(tc_bank_rank_a.tRDRD_sr, 6); ++			tc_bank_rank_a.tRDRD_dr = max(tc_bank_rank_a.tRDRD_dr, 6); ++			tc_bank_rank_a.tRDRD_dd = max(tc_bank_rank_a.tRDRD_dd, 6); ++			mchbar_write32(TC_BANK_RANK_A_ch(channel), tc_bank_rank_a.raw); ++		} ++		if (enable_scrambling) { ++			const union ddr_scramble_reg ddr_scramble = { ++				.scram_key = get_random_number(channel), ++				.scram_en  = 1, ++			}; ++			mchbar_write32(DDR_SCRAMBLE_ch(channel), ddr_scramble.raw); ++		} ++		if (ctrl->tCMD == 1) { ++			/* If we are in 1N mode, enable and set command rate limit to 3 */ ++			union mcmain_command_rate_limit_reg cmd_rate_limit = { ++				.raw = mchbar_read32(COMMAND_RATE_LIMIT_ch(channel)), ++			}; ++			cmd_rate_limit.enable_cmd_limit = 1; ++			cmd_rate_limit.cmd_rate_limit   = 3; ++			mchbar_write32(COMMAND_RATE_LIMIT_ch(channel), cmd_rate_limit.raw); ++		} ++		if (enable_cmd_tristate) { ++			/* Enable command tri-state at the end of training */ ++			union tc_bank_rank_a_reg tc_bank_rank_a = { ++				.raw = mchbar_read32(TC_BANK_RANK_A_ch(channel)), ++			}; ++			tc_bank_rank_a.cmd_3st_dis = 0; ++			mchbar_write32(TC_BANK_RANK_A_ch(channel), tc_bank_rank_a.raw); ++		} ++		/* Set MC to normal mode and clean the ODT and CKE */ ++		mchbar_write32(REUT_ch_SEQ_CFG(channel), REUT_MODE_NOP << 12); ++		/* Set again the rank occupancy */ ++		mchbar_write8(MC_INIT_STATE_ch(channel), ctrl->rankmap[channel]); ++		if (ctrl->is_ecc) { ++			/* Enable ECC I/O and logic */ ++			union mad_dimm_reg mad_dimm = { ++				.raw = mchbar_read32(MAD_DIMM(channel)), ++			}; ++			mad_dimm.ecc_mode = 3; ++			mchbar_write32(MAD_DIMM(channel), mad_dimm.raw); ++		} ++	} ++ ++	if (!is_hsw_ult()) ++		update_internal_clocks_on(ctrl); ++ ++	update_sdll_length(ctrl); ++ ++	program_burst_end_odt_delay(ctrl); ++ ++	if (is_hsw_ult()) { ++		for (uint8_t channel = 0; channel < NUM_CHANNELS; channel++) { ++			if (!does_ch_exist(ctrl, channel)) ++				continue; ++ ++			set_rx_clk_stg_num(ctrl, channel); ++		} ++		/** TODO: Program DDRPL_CR_DDR_TX_DELAY if Memory Trace is enabled **/ ++	} ++ ++	/* Enable periodic COMP */ ++	mchbar_write32(M_COMP, (union pcu_comp_reg) { ++		.comp_interval = COMP_INT, ++	}.raw); ++ ++	/* Enable the power mode before PCU starts working */ ++	train_power_modes_post(ctrl); ++ ++	/* Set idle timer and self refresh enable bits */ ++	enter_sr(); ++ ++	/** FIXME: Do not hardcode power weights and RAPL settings **/ ++	mchbar_write32(0x5888, 0x00000d0d); ++	mchbar_write32(0x5884, 0x00000004);	/* 58.2 pJ */ ++ ++	mchbar_write32(0x58e0, 0); ++	mchbar_write32(0x58e4, 0); ++ ++	mchbar_write32(0x5890, 0xffff); ++	mchbar_write32(0x5894, 0xffff); ++	mchbar_write32(0x5898, 0xffff); ++	mchbar_write32(0x589c, 0xffff); ++	mchbar_write32(0x58d0, 0xffff); ++	mchbar_write32(0x58d4, 0xffff); ++	mchbar_write32(0x58d8, 0xffff); ++	mchbar_write32(0x58dc, 0xffff); ++ ++	/* Overwrite thermal parameters */ ++	for (uint8_t channel = 0; channel < NUM_CHANNELS; channel++) { ++		mchbar_write32(_MCMAIN_C(0x42ec, channel), 0x0000000f); ++		mchbar_write32(_MCMAIN_C(0x42f0, channel), 0x00000009); ++		mchbar_write32(_MCMAIN_C(0x42f4, channel), 0x00000093); ++		mchbar_write32(_MCMAIN_C(0x42f8, channel), 0x00000087); ++		mchbar_write32(_MCMAIN_C(0x42fc, channel), 0x000000de); ++ ++		/** TODO: Differs for LPDDR **/ ++		mchbar_write32(PM_THRT_CKE_MIN_ch(channel), 0x30); ++	} ++	mchbar_write32(PCU_DDR_PTM_CTL, 0x40); ++	return RAMINIT_STATUS_SUCCESS; ++} ++ ++static void mc_lockdown(void) ++{ ++	/* Lock memory controller registers */ ++	mchbar_write32(MC_LOCK, 0x8f); ++ ++	/* MPCOHTRK_GDXC_OCLA_ADDRESS_HI_LOCK is set when programming the memory map */ ++ ++	/* Lock memory map registers */ ++	pci_or_config16(HOST_BRIDGE, GGC,         1 <<  0); ++	pci_or_config32(HOST_BRIDGE, DPR,         1 <<  0); ++	pci_or_config32(HOST_BRIDGE, MESEG_LIMIT, 1 << 10); ++	pci_or_config32(HOST_BRIDGE, REMAPBASE,   1 <<  0); ++	pci_or_config32(HOST_BRIDGE, REMAPLIMIT,  1 <<  0); ++	pci_or_config32(HOST_BRIDGE, TOM,         1 <<  0); ++	pci_or_config32(HOST_BRIDGE, TOUUD,       1 <<  0); ++	pci_or_config32(HOST_BRIDGE, BDSM,        1 <<  0); ++	pci_or_config32(HOST_BRIDGE, BGSM,        1 <<  0); ++	pci_or_config32(HOST_BRIDGE, TOLUD,       1 <<  0); ++} ++ ++enum raminit_status raminit_done(struct sysinfo *ctrl) ++{ ++	union mc_init_state_g_reg mc_init_state_g = { ++		.raw = mchbar_read32(MC_INIT_STATE_G), ++	}; ++	mc_init_state_g.refresh_enable = 1; ++	mc_init_state_g.pu_mrc_done    = 1; ++	mc_init_state_g.mrc_done       = 1; ++	mchbar_write32(MC_INIT_STATE_G, mc_init_state_g.raw); ++ ++	/* Lock the memory controller to enable normal operation */ ++	mc_lockdown(); ++ ++	/* Poll for mc_init_done_ack to make sure memory initialization is complete */ ++	printk(BIOS_DEBUG, "Waiting for mc_init_done acknowledgement... "); ++ ++	struct stopwatch timer; ++	stopwatch_init_msecs_expire(&timer, 2000); ++	do { ++		mc_init_state_g.raw = mchbar_read32(MC_INIT_STATE_G); ++ ++		/* DRAM will NOT work without the acknowledgement. There is no hope. */ ++		if (stopwatch_expired(&timer)) ++			die("\nTimed out waiting for mc_init_done acknowledgement\n"); ++ ++	} while (mc_init_state_g.mc_init_done_ack == 0); ++	printk(BIOS_DEBUG, "DONE!\n"); ++ ++	/* Provide some data for the graphics driver. Yes, it's hardcoded. */ ++	mchbar_write32(SSKPD + 0, 0x05a2404f); ++	mchbar_write32(SSKPD + 4, 0x140000a0); ++	return RAMINIT_STATUS_SUCCESS; ++} +diff --git a/src/northbridge/intel/haswell/native_raminit/raminit_main.c b/src/northbridge/intel/haswell/native_raminit/raminit_main.c +index 1ff23be615..3a65fb01fb 100644 +--- a/src/northbridge/intel/haswell/native_raminit/raminit_main.c ++++ b/src/northbridge/intel/haswell/native_raminit/raminit_main.c +@@ -63,6 +63,8 @@ static const struct task_entry cold_boot[] = { + 	{ train_receive_enable,                                   true, "RCVET",      }, + 	{ train_read_mpr,                                         true, "RDMPRT",     }, + 	{ train_jedec_write_leveling,                             true, "JWRL",       }, ++	{ activate_mc,                                            true, "ACTIVATE",   }, ++	{ raminit_done,                                           true, "RAMINITEND", }, + }; +  + /* Return a generic stepping value to make stepping checks simpler */ +@@ -143,7 +145,4 @@ void raminit_main(const enum raminit_boot_mode bootmode) +  + 	if (status != RAMINIT_STATUS_SUCCESS) + 		die("Memory initialization was met with utmost failure and misery\n"); +- +-	/** TODO: Implement the required magic **/ +-	die("NATIVE RAMINIT: More Magic (tm) required.\n"); + } +diff --git a/src/northbridge/intel/haswell/native_raminit/raminit_native.c b/src/northbridge/intel/haswell/native_raminit/raminit_native.c +index bd9bc8e692..1ea729b23d 100644 +--- a/src/northbridge/intel/haswell/native_raminit/raminit_native.c ++++ b/src/northbridge/intel/haswell/native_raminit/raminit_native.c +@@ -200,8 +200,6 @@ void perform_raminit(const int s3resume) + 		else + 			me_status = ME_INIT_STATUS_SUCCESS; +  +-		/** TODO: Remove this once raminit is implemented **/ +-		me_status = ME_INIT_STATUS_ERROR; + 		intel_early_me_init_done(me_status); + 	} +  +@@ -217,7 +215,8 @@ void perform_raminit(const int s3resume) + 	} +  + 	/* Save training data on non-S3 resumes */ +-	if (!s3resume) ++	/** TODO: Enable this once training data is populated **/ ++	if (0 && !s3resume) + 		save_mrc_data(&md); +  + 	/** TODO: setup_sdram_meminfo **/ +diff --git a/src/northbridge/intel/haswell/native_raminit/raminit_native.h b/src/northbridge/intel/haswell/native_raminit/raminit_native.h +index 666b233c45..98e39cb76e 100644 +--- a/src/northbridge/intel/haswell/native_raminit/raminit_native.h ++++ b/src/northbridge/intel/haswell/native_raminit/raminit_native.h +@@ -449,6 +449,8 @@ enum raminit_status do_jedec_init(struct sysinfo *ctrl); + enum raminit_status train_receive_enable(struct sysinfo *ctrl); + enum raminit_status train_read_mpr(struct sysinfo *ctrl); + enum raminit_status train_jedec_write_leveling(struct sysinfo *ctrl); ++enum raminit_status activate_mc(struct sysinfo *ctrl); ++enum raminit_status raminit_done(struct sysinfo *ctrl); +  + void configure_timings(struct sysinfo *ctrl); + void configure_refresh(struct sysinfo *ctrl); +diff --git a/src/northbridge/intel/haswell/native_raminit/reg_structs.h b/src/northbridge/intel/haswell/native_raminit/reg_structs.h +index a0e36ed082..0d9aaa1f7c 100644 +--- a/src/northbridge/intel/haswell/native_raminit/reg_structs.h ++++ b/src/northbridge/intel/haswell/native_raminit/reg_structs.h +@@ -294,6 +294,18 @@ union ddr_cke_ctl_controls_reg { + 	uint32_t raw; + }; +  ++union ddr_scramble_reg { ++	struct __packed { ++		uint32_t scram_en    :  1; // Bits  0:0 ++		uint32_t scram_key   : 16; // Bits 16:1 ++		uint32_t clk_gate_ab :  2; // Bits 18:17 ++		uint32_t clk_gate_c  :  2; // Bits 20:19 ++		uint32_t en_dbi_ab   :  1; // Bits 21:21 ++		uint32_t             : 10; // Bits 31:17 ++	}; ++	uint32_t raw; ++}; ++ + union ddr_scram_misc_control_reg { + 	struct __packed { + 		uint32_t wl_wake_cycles       :  2; // Bits  1:0 +diff --git a/src/northbridge/intel/haswell/registers/mchbar.h b/src/northbridge/intel/haswell/registers/mchbar.h +index 7c0b5a49de..49a215aa71 100644 +--- a/src/northbridge/intel/haswell/registers/mchbar.h ++++ b/src/northbridge/intel/haswell/registers/mchbar.h +@@ -20,6 +20,7 @@ +  + #define DDR_DATA_TRAIN_FEEDBACK(ch, byte)	_DDRIO_C_R_B(0x0054, ch, 0, byte) +  ++#define DQ_CONTROL_1(ch, byte)			_DDRIO_C_R_B(0x0060, ch, 0, byte) + #define DQ_CONTROL_2(ch, byte)			_DDRIO_C_R_B(0x0064, ch, 0, byte) + #define DDR_DATA_OFFSET_TRAIN_ch_b(ch, byte)	_DDRIO_C_R_B(0x0070, ch, 0, byte) + #define DQ_CONTROL_0(ch, byte)			_DDRIO_C_R_B(0x0074, ch, 0, byte) +@@ -147,6 +148,8 @@ + #define QCLK_ch_LDAT_SDAT(ch)			_MCMAIN_C(0x42d4, ch) + #define QCLK_ch_LDAT_DATA_IN_x(ch, x)		_MCMAIN_C_X(0x42dc, ch, x) /* x in 0 .. 1 */ +  ++#define PM_THRT_CKE_MIN_ch(ch)			_MCMAIN_C(0x4328, ch) ++ + #define REUT_GLOBAL_CTL				0x4800 + #define REUT_GLOBAL_ERR				0x4804 +  +@@ -175,6 +178,8 @@ +  + #define MCSCHEDS_DFT_MISC	0x4c30 +  ++#define PM_PDWN_CONFIG		0x4cb0 ++ + #define REUT_ERR_DATA_STATUS	0x4ce0 +  + #define REUT_MISC_CKE_CTRL	0x4d90 +@@ -186,8 +191,10 @@ + #define MAD_CHNL		0x5000 /* Address Decoder Channel Configuration */ + #define MAD_DIMM(ch)		(0x5004 + (ch) * 4) + #define MAD_ZR			0x5014 ++#define MCDECS_CBIT		0x501c + #define MC_INIT_STATE_G		0x5030 + #define MRC_REVISION		0x5034 /* MRC Revision */ ++#define PM_SREF_CONFIG		0x5060 +  + #define RCOMP_TIMER		0x5084 +  +--  +2.39.2 + diff --git a/config/coreboot/haswell/patches/0026-Remove-warning-for-coreboot-images-built-without-a-p.patch b/config/coreboot/haswell/patches/0026-Remove-warning-for-coreboot-images-built-without-a-p.patch new file mode 100644 index 00000000..547c6392 --- /dev/null +++ b/config/coreboot/haswell/patches/0026-Remove-warning-for-coreboot-images-built-without-a-p.patch @@ -0,0 +1,38 @@ +From 1ce4f118b024a6367382b46016781f30fe622e3e Mon Sep 17 00:00:00 2001 +From: Nicholas Chin <nic.c3.14@gmail.com> +Date: Fri, 12 May 2023 19:55:15 -0600 +Subject: [PATCH] Remove warning for coreboot images built without a payload + +I added this in upstream to prevent people from accidentally flashing +roms without a payload resulting in a no boot situation, but in +libreboot lbmk handles the payload and thus this warning always comes +up. This has caused confusion and concern so just patch it out. +--- + payloads/Makefile.inc | 13 +------------ + 1 file changed, 1 insertion(+), 12 deletions(-) + +diff --git a/payloads/Makefile.inc b/payloads/Makefile.inc +index e735443a76..4f1692a873 100644 +--- a/payloads/Makefile.inc ++++ b/payloads/Makefile.inc +@@ -49,16 +49,5 @@ distclean-payloads: + print-repo-info-payloads: + 	-$(foreach payload, $(PAYLOADS_LIST), $(MAKE) -C $(payload) print-repo-info 2>/dev/null; ) +  +-ifeq ($(CONFIG_PAYLOAD_NONE),y) +-files_added:: warn_no_payload +-endif +- +-warn_no_payload: +-	printf "\n\t** WARNING **\n" +-	printf "coreboot has been built without a payload. Writing\n" +-	printf "a coreboot image without a payload to your board's\n" +-	printf "flash chip will result in a non-booting system. You\n" +-	printf "can use cbfstool to add a payload to the image.\n\n" +- + .PHONY: force-payload coreinfo nvramcui +-.PHONY: clean-payloads distclean-payloads print-repo-info-payloads warn_no_payload ++.PHONY: clean-payloads distclean-payloads print-repo-info-payloads +--  +2.40.1 + diff --git a/config/coreboot/haswell/patches/0027-coreboot-haswell-fix-acpica-downloads.patch b/config/coreboot/haswell/patches/0027-coreboot-haswell-fix-acpica-downloads.patch new file mode 100644 index 00000000..292d60e9 --- /dev/null +++ b/config/coreboot/haswell/patches/0027-coreboot-haswell-fix-acpica-downloads.patch @@ -0,0 +1,30 @@ +From 29c1116ebd5879568010a8386e4838294a78b408 Mon Sep 17 00:00:00 2001 +From: Leah Rowe <leah@libreboot.org> +Date: Sun, 16 Jul 2023 03:48:23 +0100 +Subject: [PATCH 1/1] coreboot/haswell: fix acpica downloads + +the upstream link died. i now host the relevant acpica +tarball myself, on libreboot rsync. this patch makes +coreboot crossgcc use that + +Signed-off-by: Leah Rowe <leah@libreboot.org> +--- + util/crossgcc/buildgcc | 2 +- + 1 file changed, 1 insertion(+), 1 deletion(-) + +diff --git a/util/crossgcc/buildgcc b/util/crossgcc/buildgcc +index 3c4b10cc92..0c4262b7b1 100755 +--- a/util/crossgcc/buildgcc ++++ b/util/crossgcc/buildgcc +@@ -52,7 +52,7 @@ MPFR_ARCHIVE="https://ftpmirror.gnu.org/mpfr/mpfr-${MPFR_VERSION}.tar.xz" + MPC_ARCHIVE="https://ftpmirror.gnu.org/mpc/mpc-${MPC_VERSION}.tar.gz" + GCC_ARCHIVE="https://ftpmirror.gnu.org/gcc/gcc-${GCC_VERSION}/gcc-${GCC_VERSION}.tar.xz" + BINUTILS_ARCHIVE="https://ftpmirror.gnu.org/binutils/binutils-${BINUTILS_VERSION}.tar.xz" +-IASL_ARCHIVE="https://acpica.org/sites/acpica/files/acpica-unix2-${IASL_VERSION}.tar.gz" ++IASL_ARCHIVE="https://mirror.math.princeton.edu/pub/libreboot/misc/acpica/acpica-unix2-${IASL_VERSION}.tar.gz" + # CLANG toolchain archive locations + LLVM_ARCHIVE="https://github.com/llvm/llvm-project/releases/download/llvmorg-${CLANG_VERSION}/llvm-${CLANG_VERSION}.src.tar.xz" + CLANG_ARCHIVE="https://github.com/llvm/llvm-project/releases/download/llvmorg-${CLANG_VERSION}/clang-${CLANG_VERSION}.src.tar.xz" +--  +2.40.1 + diff --git a/config/coreboot/haswell/target.cfg b/config/coreboot/haswell/target.cfg new file mode 100644 index 00000000..f96c5fc2 --- /dev/null +++ b/config/coreboot/haswell/target.cfg @@ -0,0 +1,4 @@ +tree="haswell" +romtype="normal" +rev="1411ecf6f0b2c7395bcb96b856dcfdddb1b0c81b" +arch="x86_64" | 
