From da3c9bb3c5c3b1f2e6e67a3695ce39b17bf68d5b Mon Sep 17 00:00:00 2001 From: Leah Rowe Date: Mon, 4 Sep 2023 02:36:41 +0100 Subject: merge config/ and resources/ Signed-off-by: Leah Rowe --- .../0024-haswell-NRI-Add-write-leveling.patch | 688 +++++++++++++++++++++ 1 file changed, 688 insertions(+) create mode 100644 config/coreboot/haswell/patches/0024-haswell-NRI-Add-write-leveling.patch (limited to 'config/coreboot/haswell/patches/0024-haswell-NRI-Add-write-leveling.patch') diff --git a/config/coreboot/haswell/patches/0024-haswell-NRI-Add-write-leveling.patch b/config/coreboot/haswell/patches/0024-haswell-NRI-Add-write-leveling.patch new file mode 100644 index 00000000..59e9af9d --- /dev/null +++ b/config/coreboot/haswell/patches/0024-haswell-NRI-Add-write-leveling.patch @@ -0,0 +1,688 @@ +From 20fe4fa852d3e13851a01b51dc984ec5976c864e Mon Sep 17 00:00:00 2001 +From: Angel Pons +Date: Sun, 8 May 2022 12:56:04 +0200 +Subject: [PATCH 24/26] haswell NRI: Add write leveling + +Implement JEDEC write leveling, which is done in two steps. The first +step uses the JEDEC procedure to do "fine" write leveling, i.e. align +the DQS phase to the clock signal. The second step performs a regular +read-write test to correct "coarse" cycle errors. + +Change-Id: I27678523fe22c38173a688e2a4751c259a20f009 +Signed-off-by: Angel Pons +--- + .../intel/haswell/native_raminit/Makefile.inc | 1 + + .../haswell/native_raminit/raminit_main.c | 1 + + .../haswell/native_raminit/raminit_native.h | 10 + + .../train_jedec_write_leveling.c | 580 ++++++++++++++++++ + .../intel/haswell/registers/mchbar.h | 2 + + 5 files changed, 594 insertions(+) + create mode 100644 src/northbridge/intel/haswell/native_raminit/train_jedec_write_leveling.c + +diff --git a/src/northbridge/intel/haswell/native_raminit/Makefile.inc b/src/northbridge/intel/haswell/native_raminit/Makefile.inc +index c442be0728..40c2f5e014 100644 +--- a/src/northbridge/intel/haswell/native_raminit/Makefile.inc ++++ b/src/northbridge/intel/haswell/native_raminit/Makefile.inc +@@ -16,5 +16,6 @@ romstage-y += setup_wdb.c + romstage-y += spd_bitmunching.c + romstage-y += testing_io.c + romstage-y += timings_refresh.c ++romstage-y += train_jedec_write_leveling.c + romstage-y += train_read_mpr.c + romstage-y += train_receive_enable.c +diff --git a/src/northbridge/intel/haswell/native_raminit/raminit_main.c b/src/northbridge/intel/haswell/native_raminit/raminit_main.c +index 264d1468f5..1ff23be615 100644 +--- a/src/northbridge/intel/haswell/native_raminit/raminit_main.c ++++ b/src/northbridge/intel/haswell/native_raminit/raminit_main.c +@@ -62,6 +62,7 @@ static const struct task_entry cold_boot[] = { + { pre_training, true, "PRETRAIN", }, + { train_receive_enable, true, "RCVET", }, + { train_read_mpr, true, "RDMPRT", }, ++ { train_jedec_write_leveling, true, "JWRL", }, + }; + + /* Return a generic stepping value to make stepping checks simpler */ +diff --git a/src/northbridge/intel/haswell/native_raminit/raminit_native.h b/src/northbridge/intel/haswell/native_raminit/raminit_native.h +index a7551ad63c..666b233c45 100644 +--- a/src/northbridge/intel/haswell/native_raminit/raminit_native.h ++++ b/src/northbridge/intel/haswell/native_raminit/raminit_native.h +@@ -59,6 +59,9 @@ + /* Specified in PI ticks. 64 PI ticks == 1 qclk */ + #define tDQSCK_DRIFT 64 + ++/* Maximum additional latency */ ++#define MAX_ADD_DELAY 2 ++ + enum margin_parameter { + RcvEna, + RdT, +@@ -215,6 +218,7 @@ enum raminit_status { + RAMINIT_STATUS_REUT_ERROR, + RAMINIT_STATUS_RCVEN_FAILURE, + RAMINIT_STATUS_RMPR_FAILURE, ++ RAMINIT_STATUS_JWRL_FAILURE, + RAMINIT_STATUS_UNSPECIFIED_ERROR, /** TODO: Deprecated in favor of specific values **/ + }; + +@@ -380,6 +384,11 @@ static inline uint32_t get_data_train_feedback(const uint8_t channel, const uint + return mchbar_read32(DDR_DATA_TRAIN_FEEDBACK(channel, byte)); + } + ++static inline uint16_t get_byte_group_errors(const uint8_t channel) ++{ ++ return mchbar_read32(4 + REUT_ch_ERR_MISC_STATUS(channel)) & 0x1ff; ++} ++ + /* Number of ticks to wait in units of 69.841279 ns (citation needed) */ + static inline void tick_delay(const uint32_t delay) + { +@@ -439,6 +448,7 @@ enum raminit_status configure_memory_map(struct sysinfo *ctrl); + enum raminit_status do_jedec_init(struct sysinfo *ctrl); + enum raminit_status train_receive_enable(struct sysinfo *ctrl); + enum raminit_status train_read_mpr(struct sysinfo *ctrl); ++enum raminit_status train_jedec_write_leveling(struct sysinfo *ctrl); + + void configure_timings(struct sysinfo *ctrl); + void configure_refresh(struct sysinfo *ctrl); +diff --git a/src/northbridge/intel/haswell/native_raminit/train_jedec_write_leveling.c b/src/northbridge/intel/haswell/native_raminit/train_jedec_write_leveling.c +new file mode 100644 +index 0000000000..1ba28a3bd4 +--- /dev/null ++++ b/src/northbridge/intel/haswell/native_raminit/train_jedec_write_leveling.c +@@ -0,0 +1,580 @@ ++/* SPDX-License-Identifier: GPL-2.0-or-later */ ++ ++#include ++#include ++#include ++#include ++#include ++ ++#include "raminit_native.h" ++#include "ranges.h" ++ ++#define JWLC_PLOT RAM_DEBUG ++#define JWRL_PLOT RAM_DEBUG ++ ++static void reset_dram_dll(struct sysinfo *ctrl, const uint8_t channel, const uint8_t rank) ++{ ++ reut_issue_mrs(ctrl, channel, BIT(rank), 0, ctrl->mr0[channel][rank] | MR0_DLL_RESET); ++} ++ ++static void program_wdb_pattern(struct sysinfo *ctrl, const bool invert) ++{ ++ /* Pattern to keep DQ-DQS simple but detect any failures. Same as NHM/WSM. */ ++ const uint8_t pat[4][2] = { ++ { 0x00, 0xff }, ++ { 0xff, 0x00 }, ++ { 0xc3, 0x3c }, ++ { 0x3c, 0xc3 }, ++ }; ++ const uint8_t pmask[2][8] = { ++ { 0, 0, 1, 1, 1, 1, 0, 0 }, ++ { 1, 1, 0, 0, 0, 0, 1, 1 }, ++ }; ++ for (uint8_t s = 0; s < ARRAY_SIZE(pat); s++) ++ write_wdb_fixed_pat(ctrl, pat[s], pmask[invert], ARRAY_SIZE(pmask[invert]), s); ++} ++ ++static int16_t set_add_delay(uint32_t *add_delay, uint8_t rank, int8_t target_off) ++{ ++ const uint8_t shift = rank * 2; ++ if (target_off > MAX_ADD_DELAY) { ++ *add_delay &= ~(3 << shift); ++ *add_delay |= MAX_ADD_DELAY << shift; ++ return 128 * (target_off - MAX_ADD_DELAY); ++ } else if (target_off < 0) { ++ *add_delay &= ~(3 << shift); ++ *add_delay |= 0 << shift; ++ return 128 * target_off; ++ } else { ++ *add_delay &= ~(3 << shift); ++ *add_delay |= target_off << shift; ++ return 0; ++ } ++} ++ ++static enum raminit_status train_jedec_write_leveling_cleanup(struct sysinfo *ctrl) ++{ ++ const struct reut_box reut_addr = { ++ .col = { ++ .start = 0, ++ .stop = 1023, ++ .inc_val = 1, ++ }, ++ }; ++ const struct wdb_pat wdb_pattern = { ++ .start_ptr = 0, ++ .stop_ptr = 3, ++ .inc_rate = 1, ++ .dq_pattern = BASIC_VA, ++ }; ++ const int8_t offsets[] = { 0, 1, -1, 2, 3 }; ++ const int8_t dq_offsets[] = { 0, -10, 10, -5, 5, -15, 15 }; ++ const uint8_t dq_offset_max = ARRAY_SIZE(dq_offsets); ++ ++ /* Set LFSR seeds to be sequential */ ++ program_wdb_lfsr(ctrl, true); ++ setup_io_test( ++ ctrl, ++ ctrl->chanmap, ++ PAT_WR_RD, ++ 2, ++ 4, ++ &reut_addr, ++ NSOE, ++ &wdb_pattern, ++ 0, ++ 0); ++ ++ const union reut_pat_wdb_cl_mux_cfg_reg reut_wdb_cl_mux_cfg = { ++ .mux_0_control = REUT_MUX_BTBUFFER, ++ .mux_1_control = REUT_MUX_BTBUFFER, ++ .mux_2_control = REUT_MUX_BTBUFFER, ++ .ecc_data_source_sel = 1, ++ }; ++ for (uint8_t channel = 0; channel < NUM_CHANNELS; channel++) { ++ if (!does_ch_exist(ctrl, channel)) ++ continue; ++ ++ mchbar_write32(REUT_ch_PAT_WDB_CL_MUX_CFG(channel), reut_wdb_cl_mux_cfg.raw); ++ } ++ ++ int8_t byte_off[NUM_CHANNELS][NUM_LANES] = { 0 }; ++ uint32_t add_delay[NUM_CHANNELS] = { 0 }; ++ enum raminit_status status = RAMINIT_STATUS_SUCCESS; ++ bool invert = false; ++ const uint16_t valid_byte_mask = BIT(ctrl->lanes) - 1; ++ for (uint8_t rank = 0; rank < NUM_SLOTRANKS; rank++) { ++ uint8_t chanmask = 0; ++ for (uint8_t channel = 0; channel < NUM_CHANNELS; channel++) ++ chanmask |= select_reut_ranks(ctrl, channel, BIT(rank)); ++ ++ if (!chanmask) ++ continue; ++ ++ printk(BIOS_DEBUG, "Rank %u\n", rank); ++ printk(JWLC_PLOT, "Channel"); ++ for (uint8_t channel = 0; channel < NUM_CHANNELS; channel++) { ++ if (!rank_in_ch(ctrl, rank, channel)) ++ continue; ++ ++ printk(JWLC_PLOT, "\t\t%u\t", channel); ++ } ++ printk(JWLC_PLOT, "\nByte\t"); ++ for (uint8_t channel = 0; channel < NUM_CHANNELS; channel++) { ++ if (!rank_in_ch(ctrl, rank, channel)) ++ continue; ++ ++ printk(JWLC_PLOT, "\t"); ++ for (uint8_t byte = 0; byte < ctrl->lanes; byte++) ++ printk(JWLC_PLOT, "%u ", byte); ++ } ++ printk(JWLC_PLOT, "\nDelay DqOffset"); ++ bool done = false; ++ int8_t byte_sum[NUM_CHANNELS] = { 0 }; ++ uint16_t byte_pass[NUM_CHANNELS] = { 0 }; ++ for (uint8_t off = 0; off < ARRAY_SIZE(offsets); off++) { ++ for (uint8_t channel = 0; channel < NUM_CHANNELS; channel++) { ++ if (!rank_in_ch(ctrl, rank, channel)) ++ continue; ++ ++ const int16_t global_byte_off = ++ set_add_delay(&add_delay[channel], rank, offsets[off]); ++ for (uint8_t byte = 0; byte < ctrl->lanes; byte++) { ++ update_txt(ctrl, channel, rank, byte, TXT_DQDQS_OFF, ++ global_byte_off); ++ } ++ mchbar_write32(SC_WR_ADD_DELAY_ch(channel), ++ add_delay[channel]); ++ } ++ /* Reset FIFOs and DRAM DLL (Micron workaround) */ ++ if (!ctrl->lpddr) { ++ io_reset(); ++ for (uint8_t channel = 0; channel < NUM_CHANNELS; channel++) { ++ if (!rank_in_ch(ctrl, rank, channel)) ++ continue; ++ ++ reset_dram_dll(ctrl, channel, rank); ++ } ++ udelay(1); ++ } ++ for (uint8_t dq_offset = 0; dq_offset < dq_offset_max; dq_offset++) { ++ printk(JWLC_PLOT, "\n% 3d\t% 3d", ++ offsets[off], dq_offsets[dq_offset]); ++ change_1d_margin_multicast( ++ ctrl, ++ WrT, ++ dq_offsets[dq_offset], ++ rank, ++ false, ++ REG_FILE_USE_RANK); ++ ++ /* ++ * Re-program the WDB pattern. Change the pattern ++ * for the next test to avoid false pass issues. ++ */ ++ program_wdb_pattern(ctrl, invert); ++ invert = !invert; ++ run_io_test(ctrl, chanmask, BASIC_VA, true); ++ done = true; ++ for (uint8_t channel = 0; channel < NUM_CHANNELS; channel++) { ++ if (!rank_in_ch(ctrl, rank, channel)) ++ continue; ++ ++ printk(JWLC_PLOT, "\t"); ++ uint16_t result = get_byte_group_errors(channel); ++ result &= valid_byte_mask; ++ ++ /* Skip bytes that have failed or already passed */ ++ const uint16_t skip_me = result | byte_pass[channel]; ++ for (uint8_t byte = 0; byte < ctrl->lanes; byte++) { ++ const bool pass = result & BIT(byte); ++ printk(JWLC_PLOT, pass ? "# " : ". "); ++ if (skip_me & BIT(byte)) ++ continue; ++ ++ byte_pass[channel] |= BIT(byte); ++ byte_off[channel][byte] = offsets[off]; ++ byte_sum[channel] += offsets[off]; ++ } ++ if (byte_pass[channel] != valid_byte_mask) ++ done = false; ++ } ++ if (done) ++ break; ++ } ++ if (done) ++ break; ++ } ++ printk(BIOS_DEBUG, "\n\n"); ++ if (!done) { ++ printk(BIOS_ERR, "JWLC: Could not find a pass for all bytes\n"); ++ for (uint8_t channel = 0; channel < NUM_CHANNELS; channel++) { ++ if (!rank_in_ch(ctrl, rank, channel)) ++ continue; ++ ++ printk(BIOS_ERR, "Channel %u, rank %u fail:", channel, rank); ++ const uint16_t passing_mask = byte_pass[channel]; ++ for (uint8_t byte = 0; byte < ctrl->lanes; byte++) { ++ if (BIT(byte) & passing_mask) ++ continue; ++ ++ printk(BIOS_ERR, " %u", byte); ++ } ++ printk(BIOS_ERR, "\n"); ++ } ++ status = RAMINIT_STATUS_JWRL_FAILURE; ++ break; ++ } ++ for (uint8_t channel = 0; channel < NUM_CHANNELS; channel++) { ++ if (!rank_in_ch(ctrl, rank, channel)) ++ continue; ++ ++ /* Refine target offset to make sure it works for all bytes */ ++ int8_t target_off = DIV_ROUND_CLOSEST(byte_sum[channel], ctrl->lanes); ++ int16_t global_byte_off = 0; ++ uint8_t all_good_loops = 0; ++ bool all_good = 0; ++ while (!all_good) { ++ global_byte_off = ++ set_add_delay(&add_delay[channel], rank, target_off); ++ all_good = true; ++ for (uint8_t byte = 0; byte < ctrl->lanes; byte++) { ++ int16_t local_offset; ++ local_offset = byte_off[channel][byte] - target_off; ++ local_offset = local_offset * 128 + global_byte_off; ++ const uint16_t tx_dq = ctrl->tx_dq[channel][rank][byte]; ++ if (tx_dq + local_offset >= (512 - 64)) { ++ all_good = false; ++ all_good_loops++; ++ target_off++; ++ break; ++ } ++ const uint16_t txdqs = ctrl->tx_dq[channel][rank][byte]; ++ if (txdqs + local_offset < 96) { ++ all_good = false; ++ all_good_loops++; ++ target_off--; ++ break; ++ } ++ } ++ /* Avoid an infinite loop */ ++ if (all_good_loops > 3) ++ break; ++ } ++ if (!all_good) { ++ printk(BIOS_ERR, "JWLC: Target offset refining failed\n"); ++ status = RAMINIT_STATUS_JWRL_FAILURE; ++ break; ++ } ++ printk(BIOS_DEBUG, "C%u.R%u: Offset\tFinalEdge\n", channel, rank); ++ for (uint8_t byte = 0; byte < ctrl->lanes; byte++) { ++ int16_t local_offset; ++ local_offset = byte_off[channel][byte] - target_off; ++ local_offset = local_offset * 128 + global_byte_off; ++ ctrl->tx_dq[channel][rank][byte] += local_offset; ++ ctrl->txdqs[channel][rank][byte] += local_offset; ++ update_txt(ctrl, channel, rank, byte, TXT_RESTORE, 0); ++ printk(BIOS_DEBUG, " B%u: %d\t%d\n", byte, local_offset, ++ ctrl->txdqs[channel][rank][byte]); ++ } ++ mchbar_write32(SC_WR_ADD_DELAY_ch(channel), add_delay[channel]); ++ if (!ctrl->lpddr) { ++ reset_dram_dll(ctrl, channel, rank); ++ udelay(1); ++ } ++ printk(BIOS_DEBUG, "\n"); ++ } ++ printk(BIOS_DEBUG, "\n"); ++ } ++ ++ /* Restore WDB after test */ ++ write_wdb_va_pat(ctrl, 0, BASIC_VA_PAT_SPREAD_8, 8, 0); ++ program_wdb_lfsr(ctrl, false); ++ mchbar_write32(DDR_DATA_OFFSET_TRAIN, 0); ++ ++ /** TODO: Do full JEDEC init instead? **/ ++ io_reset(); ++ return status; ++} ++ ++static enum raminit_status verify_wl_width(const int32_t lwidth) ++{ ++ if (lwidth <= 32) { ++ /* Check if width is valid */ ++ printk(BIOS_ERR, "WrLevel: Width region (%d) too small\n", lwidth); ++ return RAMINIT_STATUS_JWRL_FAILURE; ++ } ++ if (lwidth >= 96) { ++ /* Since we're calibrating a phase, a too large region is a problem */ ++ printk(BIOS_ERR, "WrLevel: Width region (%d) too large\n", lwidth); ++ return RAMINIT_STATUS_JWRL_FAILURE; ++ } ++ return 0; ++} ++ ++enum raminit_status train_jedec_write_leveling(struct sysinfo *ctrl) ++{ ++ /* ++ * Enabling WL mode causes DQS to toggle for 1024 QCLK. ++ * Wait for this to stop. Round up to nearest microsecond. ++ */ ++ const bool wl_long_delay = ctrl->lpddr; ++ const uint32_t dqs_toggle_time = wl_long_delay ? 2048 : 1024; ++ const uint32_t wait_time_us = DIV_ROUND_UP(ctrl->qclkps * dqs_toggle_time, 1000 * 1000); ++ ++ const uint16_t wl_start = 192; ++ const uint16_t wl_stop = 192 + 128; ++ const uint16_t wl_step = 2; ++ ++ /* Do not use cached MR values */ ++ const bool save_restore_mrs = ctrl->restore_mrs; ++ ctrl->restore_mrs = 0; ++ ++ /* Propagate delay values (without a write command) */ ++ for (uint8_t channel = 0; channel < NUM_CHANNELS; channel++) { ++ if (!does_ch_exist(ctrl, channel)) ++ continue; ++ ++ /* Propagate delay values from rank 0 to prevent assertion failures in RTL */ ++ union ddr_data_control_0_reg data_control_0 = { ++ .raw = ctrl->dq_control_0[channel], ++ }; ++ data_control_0.read_rf_rd = 0; ++ data_control_0.read_rf_wr = 1; ++ data_control_0.read_rf_rank = 0; ++ mchbar_write32(DDR_DATA_ch_CONTROL_0(channel), data_control_0.raw); ++ for (uint8_t byte = 0; byte < ctrl->lanes; byte++) { ++ union ddr_data_control_2_reg data_control_2 = { ++ .raw = ctrl->dq_control_2[channel][byte], ++ }; ++ data_control_2.force_bias_on = 1; ++ data_control_2.force_rx_on = 0; ++ data_control_2.wl_long_delay = wl_long_delay; ++ mchbar_write32(DQ_CONTROL_2(channel, byte), data_control_2.raw); ++ } ++ } ++ ++ if (ctrl->lpddr) ++ die("%s: Missing LPDDR support\n", __func__); ++ ++ if (!ctrl->lpddr) ++ ddr3_program_mr1(ctrl, 0, 1); ++ ++ enum raminit_status status = RAMINIT_STATUS_SUCCESS; ++ struct phase_train_data region_data[NUM_CHANNELS][NUM_LANES] = { 0 }; ++ for (uint8_t rank = 0; rank < NUM_SLOTRANKS; rank++) { ++ if (!does_rank_exist(ctrl, rank)) ++ continue; ++ ++ for (uint8_t channel = 0; channel < NUM_CHANNELS; channel++) { ++ if (!rank_in_ch(ctrl, rank, channel)) ++ continue; ++ ++ /** TODO: Differs for LPDDR **/ ++ uint16_t mr1reg = ctrl->mr1[channel][rank]; ++ mr1reg &= ~MR1_QOFF_ENABLE; ++ mr1reg |= MR1_WL_ENABLE; ++ if (is_hsw_ult()) { ++ mr1reg &= ~RTTNOM_MASK; ++ mr1reg |= encode_ddr3_rttnom(120); ++ } else if (ctrl->dpc[channel] == 2) { ++ mr1reg &= ~RTTNOM_MASK; ++ mr1reg |= encode_ddr3_rttnom(60); ++ } ++ reut_issue_mrs(ctrl, channel, BIT(rank), 1, mr1reg); ++ ++ /* Assert ODT for myself */ ++ uint8_t odt_matrix = BIT(rank); ++ if (ctrl->dpc[channel] == 2) { ++ /* Assert ODT for non-target DIMM */ ++ const uint8_t other_dimm = ((rank + 2) / 2) & 1; ++ odt_matrix |= BIT(2 * other_dimm); ++ } ++ ++ union reut_misc_odt_ctrl_reg reut_misc_odt_ctrl = { ++ .raw = 0, ++ }; ++ if (ctrl->lpddr) { ++ /* Only one ODT pin for ULT */ ++ reut_misc_odt_ctrl.odt_on = 1; ++ reut_misc_odt_ctrl.odt_override = 1; ++ } else if (!is_hsw_ult()) { ++ reut_misc_odt_ctrl.odt_on = odt_matrix; ++ reut_misc_odt_ctrl.odt_override = 0xf; ++ } ++ mchbar_write32(REUT_ch_MISC_ODT_CTRL(channel), reut_misc_odt_ctrl.raw); ++ } ++ for (uint8_t channel = 0; channel < NUM_CHANNELS; channel++) { ++ if (!rank_in_ch(ctrl, rank, channel)) ++ continue; ++ ++ /* ++ * Enable write leveling mode in DDR and propagate delay ++ * values (without a write command). Stay in WL mode. ++ */ ++ union ddr_data_control_0_reg data_control_0 = { ++ .raw = ctrl->dq_control_0[channel], ++ }; ++ data_control_0.wl_training_mode = 1; ++ data_control_0.tx_pi_on = 1; ++ data_control_0.read_rf_rd = 0; ++ data_control_0.read_rf_wr = 1; ++ data_control_0.read_rf_rank = rank; ++ mchbar_write32(DDR_DATA_ch_CONTROL_0(channel), data_control_0.raw); ++ } ++ printk(BIOS_DEBUG, "\nRank %u\n", rank); ++ printk(JWRL_PLOT, "Channel\t"); ++ for (uint8_t channel = 0; channel < NUM_CHANNELS; channel++) { ++ if (!rank_in_ch(ctrl, rank, channel)) ++ continue; ++ ++ printk(JWRL_PLOT, "%u", channel); ++ if (channel > 0) ++ continue; ++ ++ for (uint8_t byte = 0; byte < ctrl->lanes; byte++) ++ printk(JWRL_PLOT, "\t"); ++ } ++ printk(JWRL_PLOT, "\nByte"); ++ for (uint8_t channel = 0; channel < NUM_CHANNELS; channel++) { ++ if (!rank_in_ch(ctrl, rank, channel)) ++ continue; ++ ++ for (uint8_t byte = 0; byte < ctrl->lanes; byte++) ++ printk(JWRL_PLOT, "\t%u", byte); ++ } ++ printk(JWRL_PLOT, "\nWlDelay"); ++ for (uint16_t wl_delay = wl_start; wl_delay < wl_stop; wl_delay += wl_step) { ++ printk(JWRL_PLOT, "\n %3u:", wl_delay); ++ for (uint8_t channel = 0; channel < NUM_CHANNELS; channel++) { ++ if (!rank_in_ch(ctrl, rank, channel)) ++ continue; ++ ++ for (uint8_t byte = 0; byte < ctrl->lanes; byte++) { ++ update_txt(ctrl, channel, rank, byte, TXT_TXDQS, ++ wl_delay); ++ } ++ } ++ /* Wait for the first burst to finish */ ++ if (wl_delay == wl_start) ++ udelay(wait_time_us); ++ ++ io_reset(); ++ udelay(wait_time_us); ++ for (uint8_t channel = 0; channel < NUM_CHANNELS; channel++) { ++ if (!rank_in_ch(ctrl, rank, channel)) ++ continue; ++ ++ for (uint8_t byte = 0; byte < ctrl->lanes; byte++) { ++ const uint32_t feedback = ++ get_data_train_feedback(channel, byte); ++ const bool pass = (feedback & 0x1ff) >= 16; ++ printk(JWRL_PLOT, "\t%c%u", pass ? '.' : '#', feedback); ++ phase_record_pass( ++ ®ion_data[channel][byte], ++ pass, ++ wl_delay, ++ wl_start, ++ wl_step); ++ } ++ } ++ } ++ printk(JWRL_PLOT, "\n"); ++ printk(BIOS_DEBUG, "\n\tInitSt\tInitEn\tCurrSt\tCurrEn\tLargSt\tLargEn\n"); ++ for (uint8_t channel = 0; channel < NUM_CHANNELS; channel++) { ++ if (!rank_in_ch(ctrl, rank, channel)) ++ continue; ++ ++ printk(BIOS_DEBUG, "C%u\n", channel); ++ for (uint8_t byte = 0; byte < ctrl->lanes; byte++) { ++ struct phase_train_data *data = ®ion_data[channel][byte]; ++ ++ phase_append_initial_to_current(data, wl_start, wl_step); ++ printk(BIOS_DEBUG, " B%u:\t%d\t%d\t%d\t%d\t%d\t%d\n", ++ byte, ++ data->initial.start, ++ data->initial.end, ++ data->current.start, ++ data->current.end, ++ data->largest.start, ++ data->largest.end); ++ } ++ } ++ ++ /* ++ * Clean up after test. Very coarsely adjust for ++ * any cycle errors. Program values for TxDQS. ++ */ ++ for (uint8_t channel = 0; channel < NUM_CHANNELS; channel++) { ++ if (!rank_in_ch(ctrl, rank, channel)) ++ continue; ++ ++ /* Clear ODT before MRS (JEDEC spec) */ ++ mchbar_write32(REUT_ch_MISC_ODT_CTRL(channel), 0); ++ ++ /** TODO: Differs for LPDDR **/ ++ const uint16_t mr1reg = ctrl->mr1[channel][rank] | MR1_QOFF_ENABLE; ++ reut_issue_mrs(ctrl, channel, BIT(rank), 1, mr1reg); ++ ++ printk(BIOS_DEBUG, "\nC%u.R%u: LftEdge Width\n", channel, rank); ++ const bool rank_x16 = ctrl->dimms[channel][rank / 2].data.width == 16; ++ for (uint8_t byte = 0; byte < ctrl->lanes; byte++) { ++ struct phase_train_data *data = ®ion_data[channel][byte]; ++ const int32_t lwidth = range_width(data->largest); ++ int32_t tx_start = data->largest.start; ++ printk(BIOS_DEBUG, " B%u: %d\t%d\n", byte, tx_start, lwidth); ++ status = verify_wl_width(lwidth); ++ if (status) { ++ printk(BIOS_ERR, ++ "WrLevel problems on channel %u, byte %u\n", ++ channel, byte); ++ goto clean_up; ++ } ++ ++ /* Align byte pairs if DIMM is x16 */ ++ if (rank_x16 && (byte & 1)) { ++ const struct phase_train_data *const ref_data = ++ ®ion_data[channel][byte - 1]; ++ ++ if (tx_start > ref_data->largest.start + 64) ++ tx_start -= 128; ++ ++ if (tx_start < ref_data->largest.start - 64) ++ tx_start += 128; ++ } ++ ++ /* Fix for b4618067 - need to add 1 QCLK to DQS PI */ ++ if (is_hsw_ult()) ++ tx_start += 64; ++ ++ assert(tx_start >= 0); ++ ctrl->txdqs[channel][rank][byte] = tx_start; ++ ctrl->tx_dq[channel][rank][byte] = tx_start + 32; ++ update_txt(ctrl, channel, rank, byte, TXT_RESTORE, 0); ++ } ++ } ++ printk(BIOS_DEBUG, "\n"); ++ } ++ ++clean_up: ++ for (uint8_t channel = 0; channel < NUM_CHANNELS; channel++) { ++ if (!does_ch_exist(ctrl, channel)) ++ continue; ++ ++ mchbar_write32(DDR_DATA_ch_CONTROL_0(channel), ctrl->dq_control_0[channel]); ++ for (uint8_t byte = 0; byte < ctrl->lanes; byte++) { ++ mchbar_write32(DQ_CONTROL_2(channel, byte), ++ ctrl->dq_control_2[channel][byte]); ++ } ++ } ++ if (!ctrl->lpddr) ++ ddr3_program_mr1(ctrl, 0, 0); ++ ++ ctrl->restore_mrs = save_restore_mrs; ++ ++ if (status) ++ return status; ++ ++ /** TODO: If this step fails and dec_wrd is set, clear it and try again **/ ++ return train_jedec_write_leveling_cleanup(ctrl); ++} +diff --git a/src/northbridge/intel/haswell/registers/mchbar.h b/src/northbridge/intel/haswell/registers/mchbar.h +index 6a31d3a32c..7c0b5a49de 100644 +--- a/src/northbridge/intel/haswell/registers/mchbar.h ++++ b/src/northbridge/intel/haswell/registers/mchbar.h +@@ -121,6 +121,8 @@ + + #define REUT_ch_ERR_DATA_MASK(ch) _MCMAIN_C(0x40d8, ch) + ++#define REUT_ch_ERR_MISC_STATUS(ch) _MCMAIN_C(0x40e8, ch) ++ + #define REUT_ch_MISC_CKE_CTRL(ch) _MCMAIN_C(0x4190, ch) + #define REUT_ch_MISC_ODT_CTRL(ch) _MCMAIN_C(0x4194, ch) + #define REUT_ch_MISC_PAT_CADB_CTRL(ch) _MCMAIN_C(0x4198, ch) +-- +2.39.2 + -- cgit v1.2.1