diff options
Diffstat (limited to 'config/coreboot/default/patches/0059-haswell-NRI-Do-sense-amplifier-offset-training.patch')
-rw-r--r-- | config/coreboot/default/patches/0059-haswell-NRI-Do-sense-amplifier-offset-training.patch | 476 |
1 files changed, 476 insertions, 0 deletions
diff --git a/config/coreboot/default/patches/0059-haswell-NRI-Do-sense-amplifier-offset-training.patch b/config/coreboot/default/patches/0059-haswell-NRI-Do-sense-amplifier-offset-training.patch new file mode 100644 index 00000000..c51560c7 --- /dev/null +++ b/config/coreboot/default/patches/0059-haswell-NRI-Do-sense-amplifier-offset-training.patch @@ -0,0 +1,476 @@ +From 8528c7aa2a3cfcf0fe494a515a2e531ff0f1dab8 Mon Sep 17 00:00:00 2001 +From: Angel Pons <th3fanbus@gmail.com> +Date: Wed, 17 Apr 2024 13:20:32 +0200 +Subject: [PATCH 17/17] haswell NRI: Do sense amplifier offset training + +Quoting Wikipedia: + + A sense amplifier is a circuit that is used to amplify and detect + small signals in electronic systems. It is commonly used in memory + circuits, such as dynamic random access memory (DRAM), to read and + amplify the weak signals stored in memory cells. + +In this case, we're calibrating the sense amplifiers in the memory +controller. This training procedure uses a magic "sense amp offset +cancel" mode of the DDRIO to observe the sampled logic levels, and +sweeps Vref to find the low-high transition for each bit lane. The +procedure consists of two stages: the first stage centers per-byte +Vref (to ensure per-bit Vref offsets are as small as possible) and +the second stage centers per-bit Vref. + +Because this procedure uses the "sense amp offset cancel" mode, it +does not rely on DRAM being trained. It is assumed that the memory +controller simply makes sense amp output levels observable via the +`DDR_DATA_TRAIN_FEEDBACK` register and that the memory bus is idle +during this training step (so the lane voltage is Vdd / 2). + +Note: This procedure will need to be adapted for Broadwell because +it has per-rank per-bit RxVref registers, whereas Haswell only has +a single per-bit RxVref register for all ranks. + +Change-Id: Ia07db68763f90e9701c8a376e01279ada8dbbe07 +Signed-off-by: Angel Pons <th3fanbus@gmail.com> +--- + .../intel/haswell/native_raminit/Makefile.mk | 1 + + .../haswell/native_raminit/raminit_main.c | 1 + + .../haswell/native_raminit/raminit_native.h | 12 + + .../native_raminit/train_sense_amp_offset.c | 341 ++++++++++++++++++ + .../intel/haswell/registers/mchbar.h | 2 + + 5 files changed, 357 insertions(+) + create mode 100644 src/northbridge/intel/haswell/native_raminit/train_sense_amp_offset.c + +diff --git a/src/northbridge/intel/haswell/native_raminit/Makefile.mk b/src/northbridge/intel/haswell/native_raminit/Makefile.mk +index 8fdd17c542..4bd668a2d6 100644 +--- a/src/northbridge/intel/haswell/native_raminit/Makefile.mk ++++ b/src/northbridge/intel/haswell/native_raminit/Makefile.mk +@@ -21,3 +21,4 @@ romstage-y += timings_refresh.c + romstage-y += train_jedec_write_leveling.c + romstage-y += train_read_mpr.c + romstage-y += train_receive_enable.c ++romstage-y += train_sense_amp_offset.c +diff --git a/src/northbridge/intel/haswell/native_raminit/raminit_main.c b/src/northbridge/intel/haswell/native_raminit/raminit_main.c +index 056dde1adc..ce637e2d03 100644 +--- a/src/northbridge/intel/haswell/native_raminit/raminit_main.c ++++ b/src/northbridge/intel/haswell/native_raminit/raminit_main.c +@@ -60,6 +60,7 @@ static const struct task_entry cold_boot[] = { + { configure_memory_map, true, "MEMMAP", }, + { do_jedec_init, true, "JEDECINIT", }, + { pre_training, true, "PRETRAIN", }, ++ { train_sense_amp_offset, true, "SOT", }, + { train_receive_enable, true, "RCVET", }, + { train_read_mpr, true, "RDMPRT", }, + { train_jedec_write_leveling, true, "JWRL", }, +diff --git a/src/northbridge/intel/haswell/native_raminit/raminit_native.h b/src/northbridge/intel/haswell/native_raminit/raminit_native.h +index 2ac16eaad3..07eea98831 100644 +--- a/src/northbridge/intel/haswell/native_raminit/raminit_native.h ++++ b/src/northbridge/intel/haswell/native_raminit/raminit_native.h +@@ -23,6 +23,8 @@ + #define NUM_LANES 9 + #define NUM_LANES_NO_ECC 8 + ++#define NUM_BITS 8 ++ + #define COMP_INT 10 + + /* Always use 12 legs for emphasis (not trained) */ +@@ -219,6 +221,7 @@ enum raminit_status { + RAMINIT_STATUS_MPLL_INIT_FAILURE, + RAMINIT_STATUS_POLL_TIMEOUT, + RAMINIT_STATUS_REUT_ERROR, ++ RAMINIT_STATUS_SAMP_OFFSET_FAILURE, + RAMINIT_STATUS_RCVEN_FAILURE, + RAMINIT_STATUS_RMPR_FAILURE, + RAMINIT_STATUS_JWRL_FAILURE, +@@ -244,6 +247,12 @@ struct raminit_dimm_info { + bool valid; + }; + ++struct vref_margin { ++ uint8_t low; ++ uint8_t center; ++ uint8_t high; ++}; ++ + struct sysinfo { + enum raminit_boot_mode bootmode; + enum generic_stepping stepping; +@@ -331,6 +340,8 @@ struct sysinfo { + uint8_t rxdqsn[NUM_CHANNELS][NUM_SLOTRANKS][NUM_LANES]; + int8_t rxvref[NUM_CHANNELS][NUM_SLOTRANKS][NUM_LANES]; + ++ struct vref_margin rxdqvrefpb[NUM_CHANNELS][NUM_SLOTRANKS][NUM_LANES][NUM_BITS]; ++ + uint8_t clk_pi_code[NUM_CHANNELS][NUM_SLOTRANKS]; + uint8_t ctl_pi_code[NUM_CHANNELS][NUM_SLOTRANKS]; + uint8_t cke_pi_code[NUM_CHANNELS][NUM_SLOTRANKS]; +@@ -453,6 +464,7 @@ enum raminit_status convert_timings(struct sysinfo *ctrl); + enum raminit_status configure_mc(struct sysinfo *ctrl); + enum raminit_status configure_memory_map(struct sysinfo *ctrl); + enum raminit_status do_jedec_init(struct sysinfo *ctrl); ++enum raminit_status train_sense_amp_offset(struct sysinfo *ctrl); + enum raminit_status train_receive_enable(struct sysinfo *ctrl); + enum raminit_status train_read_mpr(struct sysinfo *ctrl); + enum raminit_status train_jedec_write_leveling(struct sysinfo *ctrl); +diff --git a/src/northbridge/intel/haswell/native_raminit/train_sense_amp_offset.c b/src/northbridge/intel/haswell/native_raminit/train_sense_amp_offset.c +new file mode 100644 +index 0000000000..d4f199fefb +--- /dev/null ++++ b/src/northbridge/intel/haswell/native_raminit/train_sense_amp_offset.c +@@ -0,0 +1,341 @@ ++/* SPDX-License-Identifier: GPL-2.0-or-later */ ++ ++#include <assert.h> ++#include <commonlib/bsd/clamp.h> ++#include <console/console.h> ++#include <delay.h> ++#include <lib.h> ++#include <types.h> ++ ++#include "raminit_native.h" ++ ++#define VREF_OFFSET_PLOT RAM_DEBUG ++#define SAMP_OFFSET_PLOT RAM_DEBUG ++ ++struct vref_train_data { ++ int8_t best_sum; ++ int8_t best_vref; ++ int8_t sum_bits; ++ uint8_t high_mask; ++ uint8_t low_mask; ++}; ++ ++static enum raminit_status train_vref_offset(struct sysinfo *ctrl) ++{ ++ const int8_t vref_start = -15; ++ const int8_t vref_stop = 15; ++ const struct vref_train_data initial_vref_values = { ++ .best_sum = -NUM_LANES, ++ .best_vref = 0, ++ .high_mask = 0, ++ .low_mask = 0xff, ++ }; ++ struct vref_train_data vref_data[NUM_CHANNELS][NUM_LANES]; ++ ++ printk(VREF_OFFSET_PLOT, "Plot of sum_bits across Vref settings\nChannel"); ++ for (uint8_t channel = 0; channel < NUM_CHANNELS; channel++) { ++ if (!does_ch_exist(ctrl, channel)) ++ continue; ++ ++ printk(VREF_OFFSET_PLOT, "\t%u\t\t", channel); ++ } ++ ++ printk(VREF_OFFSET_PLOT, "\nByte"); ++ for (uint8_t channel = 0; channel < NUM_CHANNELS; channel++) { ++ if (!does_ch_exist(ctrl, channel)) ++ continue; ++ ++ printk(VREF_OFFSET_PLOT, "\t"); ++ for (uint8_t byte = 0; byte < ctrl->lanes; byte++) { ++ printk(VREF_OFFSET_PLOT, "%u ", byte); ++ vref_data[channel][byte] = initial_vref_values; ++ union ddr_data_control_2_reg data_control_2 = { ++ .raw = ctrl->dq_control_2[channel][byte], ++ }; ++ data_control_2.force_bias_on = 1; ++ data_control_2.force_rx_on = 1; ++ mchbar_write32(DQ_CONTROL_2(channel, byte), data_control_2.raw); ++ } ++ } ++ ++ /* Sweep through Vref settings and find point SampOffset of +/- 7 passes */ ++ printk(VREF_OFFSET_PLOT, "\n1/2 Vref"); ++ for (int8_t vref = vref_start; vref <= vref_stop; vref++) { ++ printk(VREF_OFFSET_PLOT, "\n% 3d", vref); ++ ++ /* ++ * To perform this test, enable offset cancel mode and enable ODT. ++ * Check results and update variables. Ideal result is all zeroes. ++ * Clear offset cancel mode at end of test to write RX_OFFSET_VDQ. ++ */ ++ change_1d_margin_multicast(ctrl, RdV, vref, 0, false, REG_FILE_USE_RANK); ++ ++ /* Program settings for Vref and SampOffset = 7 (8 + 7) */ ++ mchbar_write32(DDR_DATA_RX_OFFSET_VDQ, 0xffffffff); ++ for (uint8_t channel = 0; channel < NUM_CHANNELS; channel++) { ++ if (!does_ch_exist(ctrl, channel)) ++ continue; ++ ++ /* Propagate delay values (without a read command) */ ++ union ddr_data_control_0_reg data_control_0 = { ++ .raw = ctrl->dq_control_0[channel], ++ }; ++ data_control_0.read_rf_rd = 1; ++ data_control_0.read_rf_wr = 0; ++ data_control_0.read_rf_rank = 0; ++ data_control_0.force_odt_on = 1; ++ data_control_0.samp_train_mode = 1; ++ mchbar_write32(DDR_DATA_ch_CONTROL_0(channel), data_control_0.raw); ++ udelay(1); ++ data_control_0.samp_train_mode = 0; ++ mchbar_write32(DDR_DATA_ch_CONTROL_0(channel), data_control_0.raw); ++ for (uint8_t byte = 0; byte < ctrl->lanes; byte++) { ++ const uint8_t feedback = get_data_train_feedback(channel, byte); ++ struct vref_train_data *curr_data = &vref_data[channel][byte]; ++ curr_data->low_mask &= feedback; ++ curr_data->sum_bits = -popcnt(feedback); ++ } ++ } ++ ++ /* Program settings for Vref and SampOffset = -7 (8 - 7) */ ++ mchbar_write32(DDR_DATA_RX_OFFSET_VDQ, 0x11111111); ++ for (uint8_t channel = 0; channel < NUM_CHANNELS; channel++) { ++ if (!does_ch_exist(ctrl, channel)) ++ continue; ++ ++ /* Propagate delay values (without a read command) */ ++ union ddr_data_control_0_reg data_control_0 = { ++ .raw = ctrl->dq_control_0[channel], ++ }; ++ data_control_0.read_rf_rd = 1; ++ data_control_0.read_rf_wr = 0; ++ data_control_0.read_rf_rank = 0; ++ data_control_0.force_odt_on = 1; ++ data_control_0.samp_train_mode = 1; ++ mchbar_write32(DDR_DATA_ch_CONTROL_0(channel), data_control_0.raw); ++ udelay(1); ++ data_control_0.samp_train_mode = 0; ++ mchbar_write32(DDR_DATA_ch_CONTROL_0(channel), data_control_0.raw); ++ printk(VREF_OFFSET_PLOT, "\t"); ++ for (uint8_t byte = 0; byte < ctrl->lanes; byte++) { ++ const uint8_t feedback = get_data_train_feedback(channel, byte); ++ struct vref_train_data *curr_data = &vref_data[channel][byte]; ++ curr_data->high_mask |= feedback; ++ curr_data->sum_bits += popcnt(feedback); ++ printk(VREF_OFFSET_PLOT, "%d ", curr_data->sum_bits); ++ if (curr_data->sum_bits > curr_data->best_sum) { ++ curr_data->best_sum = curr_data->sum_bits; ++ curr_data->best_vref = vref; ++ ctrl->rxvref[channel][0][byte] = vref; ++ } else if (curr_data->sum_bits == curr_data->best_sum) { ++ curr_data->best_vref = vref; ++ } ++ } ++ } ++ } ++ printk(BIOS_DEBUG, "\n\nHi-Lo (XOR):"); ++ enum raminit_status status = RAMINIT_STATUS_SUCCESS; ++ for (uint8_t channel = 0; channel < NUM_CHANNELS; channel++) { ++ if (!does_ch_exist(ctrl, channel)) ++ continue; ++ ++ printk(BIOS_DEBUG, "\n C%u:", channel); ++ for (uint8_t byte = 0; byte < ctrl->lanes; byte++) { ++ struct vref_train_data *const curr_data = &vref_data[channel][byte]; ++ const uint8_t bit_xor = curr_data->high_mask ^ curr_data->low_mask; ++ printk(BIOS_DEBUG, "\t0x%02x", bit_xor); ++ if (bit_xor == 0xff) ++ continue; ++ ++ /* Report an error if any bit did not change */ ++ status = RAMINIT_STATUS_SAMP_OFFSET_FAILURE; ++ } ++ } ++ if (status) ++ printk(BIOS_ERR, "\nUnexpected bit error in Vref offset training\n"); ++ ++ printk(BIOS_DEBUG, "\n\nRdVref:"); ++ change_1d_margin_multicast(ctrl, RdV, 0, 0, false, REG_FILE_USE_RANK); ++ for (uint8_t channel = 0; channel < NUM_CHANNELS; channel++) { ++ if (!does_ch_exist(ctrl, channel)) ++ continue; ++ ++ printk(BIOS_DEBUG, "\n C%u:", channel); ++ for (uint8_t byte = 0; byte < ctrl->lanes; byte++) { ++ struct vref_train_data *const curr_data = &vref_data[channel][byte]; ++ const int8_t vref_width = ++ curr_data->best_vref - ctrl->rxvref[channel][0][byte]; ++ ++ /* ++ * Step size for Rx Vref in DATA_OFFSET_TRAIN is about 3.9 mV ++ * whereas Rx Vref step size in RX_TRAIN_RANK is about 7.8 mV ++ */ ++ int8_t vref = ctrl->rxvref[channel][0][byte] + vref_width / 2; ++ if (vref < 0) ++ vref--; ++ else ++ vref++; ++ ++ for (uint8_t rank = 0; rank < NUM_SLOTRANKS; rank++) { ++ if (!rank_in_ch(ctrl, rank, channel)) ++ continue; ++ ++ ctrl->rxvref[channel][rank][byte] = vref / 2; ++ update_rxt(ctrl, channel, rank, byte, RXT_RESTORE, 0); ++ } ++ printk(BIOS_DEBUG, "\t% 4d", ctrl->rxvref[channel][0][byte]); ++ } ++ } ++ printk(BIOS_DEBUG, "\n\n"); ++ return status; ++} ++ ++/** ++ * LPDDR has an additional bit for DQS per each byte. ++ * ++ * TODO: The DQS value must be written into Data Control 2. ++ */ ++#define NUM_OFFSET_TRAIN_BITS (NUM_BITS + 1) ++ ++#define PLOT_CH_SPACE " " ++ ++struct samp_train_data { ++ uint8_t first_zero; ++ uint8_t last_one; ++}; ++ ++static void train_samp_offset(struct sysinfo *ctrl) ++{ ++ const uint8_t max_train_bits = ctrl->lpddr ? NUM_OFFSET_TRAIN_BITS : NUM_BITS; ++ ++ struct samp_train_data samp_data[NUM_CHANNELS][NUM_LANES][NUM_OFFSET_TRAIN_BITS] = {0}; ++ ++ printk(BIOS_DEBUG, "Channel "); ++ for (uint8_t channel = 0; channel < NUM_CHANNELS; channel++) { ++ if (!does_ch_exist(ctrl, channel)) ++ continue; ++ ++ printk(BIOS_DEBUG, "%u ", channel); /* Same length as PLOT_CH_SPACE */ ++ for (uint8_t byte = 0; byte < ctrl->lanes; byte++) ++ printk(BIOS_DEBUG, " %s ", ctrl->lpddr ? " " : ""); ++ } ++ printk(BIOS_DEBUG, "\nByte "); ++ for (uint8_t channel = 0; channel < NUM_CHANNELS; channel++) { ++ if (!does_ch_exist(ctrl, channel)) ++ continue; ++ ++ for (uint8_t byte = 0; byte < ctrl->lanes; byte++) ++ printk(BIOS_DEBUG, "%u %s ", byte, ctrl->lpddr ? " " : ""); ++ ++ printk(BIOS_DEBUG, PLOT_CH_SPACE); ++ } ++ printk(SAMP_OFFSET_PLOT, "\nBits "); ++ for (uint8_t channel = 0; channel < NUM_CHANNELS; channel++) { ++ for (uint8_t byte = 0; byte < ctrl->lanes; byte++) ++ printk(SAMP_OFFSET_PLOT, "01234567%s ", ctrl->lpddr ? "S" : ""); ++ ++ printk(SAMP_OFFSET_PLOT, PLOT_CH_SPACE); ++ } ++ printk(SAMP_OFFSET_PLOT, "\n SAmp\n"); ++ for (uint8_t samp_offset = 1; samp_offset <= 15; samp_offset++) { ++ printk(SAMP_OFFSET_PLOT, "% 5d\t", samp_offset); ++ ++ uint32_t rx_offset_vdq = 0; ++ for (uint8_t bit = 0; bit < NUM_BITS; bit++) { ++ rx_offset_vdq += samp_offset << (4 * bit); ++ } ++ mchbar_write32(DDR_DATA_RX_OFFSET_VDQ, rx_offset_vdq); ++ for (uint8_t channel = 0; channel < NUM_CHANNELS; channel++) { ++ if (!does_ch_exist(ctrl, channel)) ++ continue; ++ ++ /* Propagate delay values (without a read command) */ ++ union ddr_data_control_0_reg data_control_0 = { ++ .raw = ctrl->dq_control_0[channel], ++ }; ++ data_control_0.read_rf_rd = 1; ++ data_control_0.read_rf_wr = 0; ++ data_control_0.read_rf_rank = 0; ++ data_control_0.force_odt_on = 1; ++ data_control_0.samp_train_mode = 1; ++ mchbar_write32(DDR_DATA_ch_CONTROL_0(channel), data_control_0.raw); ++ udelay(1); ++ for (uint8_t byte = 0; byte < ctrl->lanes; byte++) { ++ const uint32_t feedback = ++ get_data_train_feedback(channel, byte); ++ ++ for (uint8_t bit = 0; bit < max_train_bits; bit++) { ++ struct samp_train_data *const curr_data = ++ &samp_data[channel][byte][bit]; ++ const bool result = feedback & BIT(bit); ++ if (result) { ++ curr_data->last_one = samp_offset; ++ } else if (curr_data->first_zero == 0) { ++ curr_data->first_zero = samp_offset; ++ } ++ printk(SAMP_OFFSET_PLOT, result ? "." : "#"); ++ } ++ printk(SAMP_OFFSET_PLOT, " "); ++ } ++ printk(SAMP_OFFSET_PLOT, PLOT_CH_SPACE); ++ data_control_0.samp_train_mode = 0; ++ mchbar_write32(DDR_DATA_ch_CONTROL_0(channel), data_control_0.raw); ++ } ++ printk(SAMP_OFFSET_PLOT, "\n"); ++ } ++ printk(BIOS_DEBUG, "\nBitSAmp "); ++ for (uint8_t channel = 0; channel < NUM_CHANNELS; channel++) { ++ if (!does_ch_exist(ctrl, channel)) ++ continue; ++ ++ for (uint8_t byte = 0; byte < ctrl->lanes; byte++) { ++ uint32_t rx_offset_vdq = 0; ++ for (uint8_t bit = 0; bit < max_train_bits; bit++) { ++ struct samp_train_data *const curr_data = ++ &samp_data[channel][byte][bit]; ++ ++ uint8_t vref = curr_data->first_zero + curr_data->last_one; ++ vref = clamp_u8(0, vref / 2, 15); ++ /* ++ * Check for saturation conditions to make sure ++ * we are as close as possible to Vdd/2 (750 mV). ++ */ ++ if (curr_data->first_zero == 0) ++ vref = 15; ++ if (curr_data->last_one == 0) ++ vref = 0; ++ ++ ctrl->rxdqvrefpb[channel][0][byte][bit].center = vref; ++ rx_offset_vdq += vref & 0xf << (4 * bit); ++ printk(BIOS_DEBUG, "%x", vref); ++ } ++ mchbar_write32(RX_OFFSET_VDQ(channel, byte), rx_offset_vdq); ++ printk(BIOS_DEBUG, " "); ++ download_regfile(ctrl, channel, 1, 0, REG_FILE_USE_RANK, 0, 1, 0); ++ } ++ printk(BIOS_DEBUG, PLOT_CH_SPACE); ++ } ++ printk(BIOS_DEBUG, "\n"); ++} ++ ++enum raminit_status train_sense_amp_offset(struct sysinfo *ctrl) ++{ ++ printk(BIOS_DEBUG, "Stage 1: Vref offset training\n"); ++ const enum raminit_status status = train_vref_offset(ctrl); ++ ++ printk(BIOS_DEBUG, "Stage 2: Samp offset training\n"); ++ train_samp_offset(ctrl); ++ ++ /* Clean up after test */ ++ for (uint8_t channel = 0; channel < NUM_CHANNELS; channel++) { ++ if (!does_ch_exist(ctrl, channel)) ++ continue; ++ ++ mchbar_write32(DDR_DATA_ch_CONTROL_0(channel), ctrl->dq_control_0[channel]); ++ for (uint8_t byte = 0; byte < ctrl->lanes; byte++) ++ mchbar_write32(DQ_CONTROL_2(channel, byte), ++ ctrl->dq_control_2[channel][byte]); ++ } ++ io_reset(); ++ return status; ++} +diff --git a/src/northbridge/intel/haswell/registers/mchbar.h b/src/northbridge/intel/haswell/registers/mchbar.h +index 49a215aa71..1a168a3fc8 100644 +--- a/src/northbridge/intel/haswell/registers/mchbar.h ++++ b/src/northbridge/intel/haswell/registers/mchbar.h +@@ -18,6 +18,8 @@ + #define RX_TRAIN_ch_r_b(ch, rank, byte) _DDRIO_C_R_B(0x0000, ch, rank, byte) + #define TX_TRAIN_ch_r_b(ch, rank, byte) _DDRIO_C_R_B(0x0020, ch, rank, byte) + ++#define RX_OFFSET_VDQ(ch, byte) _DDRIO_C_R_B(0x004c, ch, 0, byte) ++ + #define DDR_DATA_TRAIN_FEEDBACK(ch, byte) _DDRIO_C_R_B(0x0054, ch, 0, byte) + + #define DQ_CONTROL_1(ch, byte) _DDRIO_C_R_B(0x0060, ch, 0, byte) +-- +2.39.2 + |