summaryrefslogtreecommitdiff
path: root/config/coreboot/haswell/patches/0017-haswell-NRI-Do-sense-amplifier-offset-training.patch
diff options
context:
space:
mode:
Diffstat (limited to 'config/coreboot/haswell/patches/0017-haswell-NRI-Do-sense-amplifier-offset-training.patch')
-rw-r--r--config/coreboot/haswell/patches/0017-haswell-NRI-Do-sense-amplifier-offset-training.patch476
1 files changed, 476 insertions, 0 deletions
diff --git a/config/coreboot/haswell/patches/0017-haswell-NRI-Do-sense-amplifier-offset-training.patch b/config/coreboot/haswell/patches/0017-haswell-NRI-Do-sense-amplifier-offset-training.patch
new file mode 100644
index 00000000..846fe9a3
--- /dev/null
+++ b/config/coreboot/haswell/patches/0017-haswell-NRI-Do-sense-amplifier-offset-training.patch
@@ -0,0 +1,476 @@
+From be58501141aa97aa544b670e566cd6cf6797c18e Mon Sep 17 00:00:00 2001
+From: Angel Pons <th3fanbus@gmail.com>
+Date: Wed, 17 Apr 2024 13:20:32 +0200
+Subject: [PATCH 17/20] haswell NRI: Do sense amplifier offset training
+
+Quoting Wikipedia:
+
+ A sense amplifier is a circuit that is used to amplify and detect
+ small signals in electronic systems. It is commonly used in memory
+ circuits, such as dynamic random access memory (DRAM), to read and
+ amplify the weak signals stored in memory cells.
+
+In this case, we're calibrating the sense amplifiers in the memory
+controller. This training procedure uses a magic "sense amp offset
+cancel" mode of the DDRIO to observe the sampled logic levels, and
+sweeps Vref to find the low-high transition for each bit lane. The
+procedure consists of two stages: the first stage centers per-byte
+Vref (to ensure per-bit Vref offsets are as small as possible) and
+the second stage centers per-bit Vref.
+
+Because this procedure uses the "sense amp offset cancel" mode, it
+does not rely on DRAM being trained. It is assumed that the memory
+controller simply makes sense amp output levels observable via the
+`DDR_DATA_TRAIN_FEEDBACK` register and that the memory bus is idle
+during this training step (so the lane voltage is Vdd / 2).
+
+Note: This procedure will need to be adapted for Broadwell because
+it has per-rank per-bit RxVref registers, whereas Haswell only has
+a single per-bit RxVref register for all ranks.
+
+Change-Id: Ia07db68763f90e9701c8a376e01279ada8dbbe07
+Signed-off-by: Angel Pons <th3fanbus@gmail.com>
+---
+ .../intel/haswell/native_raminit/Makefile.mk | 1 +
+ .../haswell/native_raminit/raminit_main.c | 1 +
+ .../haswell/native_raminit/raminit_native.h | 12 +
+ .../native_raminit/train_sense_amp_offset.c | 341 ++++++++++++++++++
+ .../intel/haswell/registers/mchbar.h | 2 +
+ 5 files changed, 357 insertions(+)
+ create mode 100644 src/northbridge/intel/haswell/native_raminit/train_sense_amp_offset.c
+
+diff --git a/src/northbridge/intel/haswell/native_raminit/Makefile.mk b/src/northbridge/intel/haswell/native_raminit/Makefile.mk
+index 8fdd17c542..4bd668a2d6 100644
+--- a/src/northbridge/intel/haswell/native_raminit/Makefile.mk
++++ b/src/northbridge/intel/haswell/native_raminit/Makefile.mk
+@@ -21,3 +21,4 @@ romstage-y += timings_refresh.c
+ romstage-y += train_jedec_write_leveling.c
+ romstage-y += train_read_mpr.c
+ romstage-y += train_receive_enable.c
++romstage-y += train_sense_amp_offset.c
+diff --git a/src/northbridge/intel/haswell/native_raminit/raminit_main.c b/src/northbridge/intel/haswell/native_raminit/raminit_main.c
+index 056dde1adc..ce637e2d03 100644
+--- a/src/northbridge/intel/haswell/native_raminit/raminit_main.c
++++ b/src/northbridge/intel/haswell/native_raminit/raminit_main.c
+@@ -60,6 +60,7 @@ static const struct task_entry cold_boot[] = {
+ { configure_memory_map, true, "MEMMAP", },
+ { do_jedec_init, true, "JEDECINIT", },
+ { pre_training, true, "PRETRAIN", },
++ { train_sense_amp_offset, true, "SOT", },
+ { train_receive_enable, true, "RCVET", },
+ { train_read_mpr, true, "RDMPRT", },
+ { train_jedec_write_leveling, true, "JWRL", },
+diff --git a/src/northbridge/intel/haswell/native_raminit/raminit_native.h b/src/northbridge/intel/haswell/native_raminit/raminit_native.h
+index 0750904aec..95ccd0a8b3 100644
+--- a/src/northbridge/intel/haswell/native_raminit/raminit_native.h
++++ b/src/northbridge/intel/haswell/native_raminit/raminit_native.h
+@@ -22,6 +22,8 @@
+ #define NUM_LANES 9
+ #define NUM_LANES_NO_ECC 8
+
++#define NUM_BITS 8
++
+ #define COMP_INT 10
+
+ /* Always use 12 legs for emphasis (not trained) */
+@@ -218,6 +220,7 @@ enum raminit_status {
+ RAMINIT_STATUS_MPLL_INIT_FAILURE,
+ RAMINIT_STATUS_POLL_TIMEOUT,
+ RAMINIT_STATUS_REUT_ERROR,
++ RAMINIT_STATUS_SAMP_OFFSET_FAILURE,
+ RAMINIT_STATUS_RCVEN_FAILURE,
+ RAMINIT_STATUS_RMPR_FAILURE,
+ RAMINIT_STATUS_JWRL_FAILURE,
+@@ -243,6 +246,12 @@ struct raminit_dimm_info {
+ bool valid;
+ };
+
++struct vref_margin {
++ uint8_t low;
++ uint8_t center;
++ uint8_t high;
++};
++
+ struct sysinfo {
+ enum raminit_boot_mode bootmode;
+ enum generic_stepping stepping;
+@@ -330,6 +339,8 @@ struct sysinfo {
+ uint8_t rxdqsn[NUM_CHANNELS][NUM_SLOTRANKS][NUM_LANES];
+ int8_t rxvref[NUM_CHANNELS][NUM_SLOTRANKS][NUM_LANES];
+
++ struct vref_margin rxdqvrefpb[NUM_CHANNELS][NUM_SLOTRANKS][NUM_LANES][NUM_BITS];
++
+ uint8_t clk_pi_code[NUM_CHANNELS][NUM_SLOTRANKS];
+ uint8_t ctl_pi_code[NUM_CHANNELS][NUM_SLOTRANKS];
+ uint8_t cke_pi_code[NUM_CHANNELS][NUM_SLOTRANKS];
+@@ -452,6 +463,7 @@ enum raminit_status convert_timings(struct sysinfo *ctrl);
+ enum raminit_status configure_mc(struct sysinfo *ctrl);
+ enum raminit_status configure_memory_map(struct sysinfo *ctrl);
+ enum raminit_status do_jedec_init(struct sysinfo *ctrl);
++enum raminit_status train_sense_amp_offset(struct sysinfo *ctrl);
+ enum raminit_status train_receive_enable(struct sysinfo *ctrl);
+ enum raminit_status train_read_mpr(struct sysinfo *ctrl);
+ enum raminit_status train_jedec_write_leveling(struct sysinfo *ctrl);
+diff --git a/src/northbridge/intel/haswell/native_raminit/train_sense_amp_offset.c b/src/northbridge/intel/haswell/native_raminit/train_sense_amp_offset.c
+new file mode 100644
+index 0000000000..d4f199fefb
+--- /dev/null
++++ b/src/northbridge/intel/haswell/native_raminit/train_sense_amp_offset.c
+@@ -0,0 +1,341 @@
++/* SPDX-License-Identifier: GPL-2.0-or-later */
++
++#include <assert.h>
++#include <commonlib/bsd/clamp.h>
++#include <console/console.h>
++#include <delay.h>
++#include <lib.h>
++#include <types.h>
++
++#include "raminit_native.h"
++
++#define VREF_OFFSET_PLOT RAM_DEBUG
++#define SAMP_OFFSET_PLOT RAM_DEBUG
++
++struct vref_train_data {
++ int8_t best_sum;
++ int8_t best_vref;
++ int8_t sum_bits;
++ uint8_t high_mask;
++ uint8_t low_mask;
++};
++
++static enum raminit_status train_vref_offset(struct sysinfo *ctrl)
++{
++ const int8_t vref_start = -15;
++ const int8_t vref_stop = 15;
++ const struct vref_train_data initial_vref_values = {
++ .best_sum = -NUM_LANES,
++ .best_vref = 0,
++ .high_mask = 0,
++ .low_mask = 0xff,
++ };
++ struct vref_train_data vref_data[NUM_CHANNELS][NUM_LANES];
++
++ printk(VREF_OFFSET_PLOT, "Plot of sum_bits across Vref settings\nChannel");
++ for (uint8_t channel = 0; channel < NUM_CHANNELS; channel++) {
++ if (!does_ch_exist(ctrl, channel))
++ continue;
++
++ printk(VREF_OFFSET_PLOT, "\t%u\t\t", channel);
++ }
++
++ printk(VREF_OFFSET_PLOT, "\nByte");
++ for (uint8_t channel = 0; channel < NUM_CHANNELS; channel++) {
++ if (!does_ch_exist(ctrl, channel))
++ continue;
++
++ printk(VREF_OFFSET_PLOT, "\t");
++ for (uint8_t byte = 0; byte < ctrl->lanes; byte++) {
++ printk(VREF_OFFSET_PLOT, "%u ", byte);
++ vref_data[channel][byte] = initial_vref_values;
++ union ddr_data_control_2_reg data_control_2 = {
++ .raw = ctrl->dq_control_2[channel][byte],
++ };
++ data_control_2.force_bias_on = 1;
++ data_control_2.force_rx_on = 1;
++ mchbar_write32(DQ_CONTROL_2(channel, byte), data_control_2.raw);
++ }
++ }
++
++ /* Sweep through Vref settings and find point SampOffset of +/- 7 passes */
++ printk(VREF_OFFSET_PLOT, "\n1/2 Vref");
++ for (int8_t vref = vref_start; vref <= vref_stop; vref++) {
++ printk(VREF_OFFSET_PLOT, "\n% 3d", vref);
++
++ /*
++ * To perform this test, enable offset cancel mode and enable ODT.
++ * Check results and update variables. Ideal result is all zeroes.
++ * Clear offset cancel mode at end of test to write RX_OFFSET_VDQ.
++ */
++ change_1d_margin_multicast(ctrl, RdV, vref, 0, false, REG_FILE_USE_RANK);
++
++ /* Program settings for Vref and SampOffset = 7 (8 + 7) */
++ mchbar_write32(DDR_DATA_RX_OFFSET_VDQ, 0xffffffff);
++ for (uint8_t channel = 0; channel < NUM_CHANNELS; channel++) {
++ if (!does_ch_exist(ctrl, channel))
++ continue;
++
++ /* Propagate delay values (without a read command) */
++ union ddr_data_control_0_reg data_control_0 = {
++ .raw = ctrl->dq_control_0[channel],
++ };
++ data_control_0.read_rf_rd = 1;
++ data_control_0.read_rf_wr = 0;
++ data_control_0.read_rf_rank = 0;
++ data_control_0.force_odt_on = 1;
++ data_control_0.samp_train_mode = 1;
++ mchbar_write32(DDR_DATA_ch_CONTROL_0(channel), data_control_0.raw);
++ udelay(1);
++ data_control_0.samp_train_mode = 0;
++ mchbar_write32(DDR_DATA_ch_CONTROL_0(channel), data_control_0.raw);
++ for (uint8_t byte = 0; byte < ctrl->lanes; byte++) {
++ const uint8_t feedback = get_data_train_feedback(channel, byte);
++ struct vref_train_data *curr_data = &vref_data[channel][byte];
++ curr_data->low_mask &= feedback;
++ curr_data->sum_bits = -popcnt(feedback);
++ }
++ }
++
++ /* Program settings for Vref and SampOffset = -7 (8 - 7) */
++ mchbar_write32(DDR_DATA_RX_OFFSET_VDQ, 0x11111111);
++ for (uint8_t channel = 0; channel < NUM_CHANNELS; channel++) {
++ if (!does_ch_exist(ctrl, channel))
++ continue;
++
++ /* Propagate delay values (without a read command) */
++ union ddr_data_control_0_reg data_control_0 = {
++ .raw = ctrl->dq_control_0[channel],
++ };
++ data_control_0.read_rf_rd = 1;
++ data_control_0.read_rf_wr = 0;
++ data_control_0.read_rf_rank = 0;
++ data_control_0.force_odt_on = 1;
++ data_control_0.samp_train_mode = 1;
++ mchbar_write32(DDR_DATA_ch_CONTROL_0(channel), data_control_0.raw);
++ udelay(1);
++ data_control_0.samp_train_mode = 0;
++ mchbar_write32(DDR_DATA_ch_CONTROL_0(channel), data_control_0.raw);
++ printk(VREF_OFFSET_PLOT, "\t");
++ for (uint8_t byte = 0; byte < ctrl->lanes; byte++) {
++ const uint8_t feedback = get_data_train_feedback(channel, byte);
++ struct vref_train_data *curr_data = &vref_data[channel][byte];
++ curr_data->high_mask |= feedback;
++ curr_data->sum_bits += popcnt(feedback);
++ printk(VREF_OFFSET_PLOT, "%d ", curr_data->sum_bits);
++ if (curr_data->sum_bits > curr_data->best_sum) {
++ curr_data->best_sum = curr_data->sum_bits;
++ curr_data->best_vref = vref;
++ ctrl->rxvref[channel][0][byte] = vref;
++ } else if (curr_data->sum_bits == curr_data->best_sum) {
++ curr_data->best_vref = vref;
++ }
++ }
++ }
++ }
++ printk(BIOS_DEBUG, "\n\nHi-Lo (XOR):");
++ enum raminit_status status = RAMINIT_STATUS_SUCCESS;
++ for (uint8_t channel = 0; channel < NUM_CHANNELS; channel++) {
++ if (!does_ch_exist(ctrl, channel))
++ continue;
++
++ printk(BIOS_DEBUG, "\n C%u:", channel);
++ for (uint8_t byte = 0; byte < ctrl->lanes; byte++) {
++ struct vref_train_data *const curr_data = &vref_data[channel][byte];
++ const uint8_t bit_xor = curr_data->high_mask ^ curr_data->low_mask;
++ printk(BIOS_DEBUG, "\t0x%02x", bit_xor);
++ if (bit_xor == 0xff)
++ continue;
++
++ /* Report an error if any bit did not change */
++ status = RAMINIT_STATUS_SAMP_OFFSET_FAILURE;
++ }
++ }
++ if (status)
++ printk(BIOS_ERR, "\nUnexpected bit error in Vref offset training\n");
++
++ printk(BIOS_DEBUG, "\n\nRdVref:");
++ change_1d_margin_multicast(ctrl, RdV, 0, 0, false, REG_FILE_USE_RANK);
++ for (uint8_t channel = 0; channel < NUM_CHANNELS; channel++) {
++ if (!does_ch_exist(ctrl, channel))
++ continue;
++
++ printk(BIOS_DEBUG, "\n C%u:", channel);
++ for (uint8_t byte = 0; byte < ctrl->lanes; byte++) {
++ struct vref_train_data *const curr_data = &vref_data[channel][byte];
++ const int8_t vref_width =
++ curr_data->best_vref - ctrl->rxvref[channel][0][byte];
++
++ /*
++ * Step size for Rx Vref in DATA_OFFSET_TRAIN is about 3.9 mV
++ * whereas Rx Vref step size in RX_TRAIN_RANK is about 7.8 mV
++ */
++ int8_t vref = ctrl->rxvref[channel][0][byte] + vref_width / 2;
++ if (vref < 0)
++ vref--;
++ else
++ vref++;
++
++ for (uint8_t rank = 0; rank < NUM_SLOTRANKS; rank++) {
++ if (!rank_in_ch(ctrl, rank, channel))
++ continue;
++
++ ctrl->rxvref[channel][rank][byte] = vref / 2;
++ update_rxt(ctrl, channel, rank, byte, RXT_RESTORE, 0);
++ }
++ printk(BIOS_DEBUG, "\t% 4d", ctrl->rxvref[channel][0][byte]);
++ }
++ }
++ printk(BIOS_DEBUG, "\n\n");
++ return status;
++}
++
++/**
++ * LPDDR has an additional bit for DQS per each byte.
++ *
++ * TODO: The DQS value must be written into Data Control 2.
++ */
++#define NUM_OFFSET_TRAIN_BITS (NUM_BITS + 1)
++
++#define PLOT_CH_SPACE " "
++
++struct samp_train_data {
++ uint8_t first_zero;
++ uint8_t last_one;
++};
++
++static void train_samp_offset(struct sysinfo *ctrl)
++{
++ const uint8_t max_train_bits = ctrl->lpddr ? NUM_OFFSET_TRAIN_BITS : NUM_BITS;
++
++ struct samp_train_data samp_data[NUM_CHANNELS][NUM_LANES][NUM_OFFSET_TRAIN_BITS] = {0};
++
++ printk(BIOS_DEBUG, "Channel ");
++ for (uint8_t channel = 0; channel < NUM_CHANNELS; channel++) {
++ if (!does_ch_exist(ctrl, channel))
++ continue;
++
++ printk(BIOS_DEBUG, "%u ", channel); /* Same length as PLOT_CH_SPACE */
++ for (uint8_t byte = 0; byte < ctrl->lanes; byte++)
++ printk(BIOS_DEBUG, " %s ", ctrl->lpddr ? " " : "");
++ }
++ printk(BIOS_DEBUG, "\nByte ");
++ for (uint8_t channel = 0; channel < NUM_CHANNELS; channel++) {
++ if (!does_ch_exist(ctrl, channel))
++ continue;
++
++ for (uint8_t byte = 0; byte < ctrl->lanes; byte++)
++ printk(BIOS_DEBUG, "%u %s ", byte, ctrl->lpddr ? " " : "");
++
++ printk(BIOS_DEBUG, PLOT_CH_SPACE);
++ }
++ printk(SAMP_OFFSET_PLOT, "\nBits ");
++ for (uint8_t channel = 0; channel < NUM_CHANNELS; channel++) {
++ for (uint8_t byte = 0; byte < ctrl->lanes; byte++)
++ printk(SAMP_OFFSET_PLOT, "01234567%s ", ctrl->lpddr ? "S" : "");
++
++ printk(SAMP_OFFSET_PLOT, PLOT_CH_SPACE);
++ }
++ printk(SAMP_OFFSET_PLOT, "\n SAmp\n");
++ for (uint8_t samp_offset = 1; samp_offset <= 15; samp_offset++) {
++ printk(SAMP_OFFSET_PLOT, "% 5d\t", samp_offset);
++
++ uint32_t rx_offset_vdq = 0;
++ for (uint8_t bit = 0; bit < NUM_BITS; bit++) {
++ rx_offset_vdq += samp_offset << (4 * bit);
++ }
++ mchbar_write32(DDR_DATA_RX_OFFSET_VDQ, rx_offset_vdq);
++ for (uint8_t channel = 0; channel < NUM_CHANNELS; channel++) {
++ if (!does_ch_exist(ctrl, channel))
++ continue;
++
++ /* Propagate delay values (without a read command) */
++ union ddr_data_control_0_reg data_control_0 = {
++ .raw = ctrl->dq_control_0[channel],
++ };
++ data_control_0.read_rf_rd = 1;
++ data_control_0.read_rf_wr = 0;
++ data_control_0.read_rf_rank = 0;
++ data_control_0.force_odt_on = 1;
++ data_control_0.samp_train_mode = 1;
++ mchbar_write32(DDR_DATA_ch_CONTROL_0(channel), data_control_0.raw);
++ udelay(1);
++ for (uint8_t byte = 0; byte < ctrl->lanes; byte++) {
++ const uint32_t feedback =
++ get_data_train_feedback(channel, byte);
++
++ for (uint8_t bit = 0; bit < max_train_bits; bit++) {
++ struct samp_train_data *const curr_data =
++ &samp_data[channel][byte][bit];
++ const bool result = feedback & BIT(bit);
++ if (result) {
++ curr_data->last_one = samp_offset;
++ } else if (curr_data->first_zero == 0) {
++ curr_data->first_zero = samp_offset;
++ }
++ printk(SAMP_OFFSET_PLOT, result ? "." : "#");
++ }
++ printk(SAMP_OFFSET_PLOT, " ");
++ }
++ printk(SAMP_OFFSET_PLOT, PLOT_CH_SPACE);
++ data_control_0.samp_train_mode = 0;
++ mchbar_write32(DDR_DATA_ch_CONTROL_0(channel), data_control_0.raw);
++ }
++ printk(SAMP_OFFSET_PLOT, "\n");
++ }
++ printk(BIOS_DEBUG, "\nBitSAmp ");
++ for (uint8_t channel = 0; channel < NUM_CHANNELS; channel++) {
++ if (!does_ch_exist(ctrl, channel))
++ continue;
++
++ for (uint8_t byte = 0; byte < ctrl->lanes; byte++) {
++ uint32_t rx_offset_vdq = 0;
++ for (uint8_t bit = 0; bit < max_train_bits; bit++) {
++ struct samp_train_data *const curr_data =
++ &samp_data[channel][byte][bit];
++
++ uint8_t vref = curr_data->first_zero + curr_data->last_one;
++ vref = clamp_u8(0, vref / 2, 15);
++ /*
++ * Check for saturation conditions to make sure
++ * we are as close as possible to Vdd/2 (750 mV).
++ */
++ if (curr_data->first_zero == 0)
++ vref = 15;
++ if (curr_data->last_one == 0)
++ vref = 0;
++
++ ctrl->rxdqvrefpb[channel][0][byte][bit].center = vref;
++ rx_offset_vdq += vref & 0xf << (4 * bit);
++ printk(BIOS_DEBUG, "%x", vref);
++ }
++ mchbar_write32(RX_OFFSET_VDQ(channel, byte), rx_offset_vdq);
++ printk(BIOS_DEBUG, " ");
++ download_regfile(ctrl, channel, 1, 0, REG_FILE_USE_RANK, 0, 1, 0);
++ }
++ printk(BIOS_DEBUG, PLOT_CH_SPACE);
++ }
++ printk(BIOS_DEBUG, "\n");
++}
++
++enum raminit_status train_sense_amp_offset(struct sysinfo *ctrl)
++{
++ printk(BIOS_DEBUG, "Stage 1: Vref offset training\n");
++ const enum raminit_status status = train_vref_offset(ctrl);
++
++ printk(BIOS_DEBUG, "Stage 2: Samp offset training\n");
++ train_samp_offset(ctrl);
++
++ /* Clean up after test */
++ for (uint8_t channel = 0; channel < NUM_CHANNELS; channel++) {
++ if (!does_ch_exist(ctrl, channel))
++ continue;
++
++ mchbar_write32(DDR_DATA_ch_CONTROL_0(channel), ctrl->dq_control_0[channel]);
++ for (uint8_t byte = 0; byte < ctrl->lanes; byte++)
++ mchbar_write32(DQ_CONTROL_2(channel, byte),
++ ctrl->dq_control_2[channel][byte]);
++ }
++ io_reset();
++ return status;
++}
+diff --git a/src/northbridge/intel/haswell/registers/mchbar.h b/src/northbridge/intel/haswell/registers/mchbar.h
+index 49a215aa71..1a168a3fc8 100644
+--- a/src/northbridge/intel/haswell/registers/mchbar.h
++++ b/src/northbridge/intel/haswell/registers/mchbar.h
+@@ -18,6 +18,8 @@
+ #define RX_TRAIN_ch_r_b(ch, rank, byte) _DDRIO_C_R_B(0x0000, ch, rank, byte)
+ #define TX_TRAIN_ch_r_b(ch, rank, byte) _DDRIO_C_R_B(0x0020, ch, rank, byte)
+
++#define RX_OFFSET_VDQ(ch, byte) _DDRIO_C_R_B(0x004c, ch, 0, byte)
++
+ #define DDR_DATA_TRAIN_FEEDBACK(ch, byte) _DDRIO_C_R_B(0x0054, ch, 0, byte)
+
+ #define DQ_CONTROL_1(ch, byte) _DDRIO_C_R_B(0x0060, ch, 0, byte)
+--
+2.39.2
+