summaryrefslogtreecommitdiff
path: root/config/coreboot/haswell/patches/0007-haswell-NRI-Add-pre-training-steps.patch
blob: 8b73df884ccec57f2def537967a0206135cdc9a0 (plain)
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
83
84
85
86
87
88
89
90
91
92
93
94
95
96
97
98
99
100
101
102
103
104
105
106
107
108
109
110
111
112
113
114
115
116
117
118
119
120
121
122
123
124
125
126
127
128
129
130
131
132
133
134
135
136
137
138
139
140
141
142
143
144
145
146
147
148
149
150
151
152
153
154
155
156
157
158
159
160
161
162
163
164
165
166
167
168
169
170
171
172
173
174
175
176
177
178
179
180
181
182
183
184
185
186
187
188
189
190
191
192
193
194
195
196
197
198
199
200
201
202
203
204
205
206
207
208
209
210
211
212
213
214
215
216
217
218
219
220
221
222
223
224
225
226
227
228
229
230
231
232
233
234
235
236
237
238
239
240
241
242
243
244
245
246
247
248
249
250
251
252
253
254
255
256
257
258
259
260
261
262
263
264
265
266
267
268
269
270
271
272
273
274
275
276
277
278
279
280
281
282
283
284
285
286
287
288
289
290
291
292
293
294
295
296
297
298
299
300
301
302
303
304
305
306
307
308
309
310
311
312
313
314
315
316
317
318
319
320
321
322
323
324
325
326
327
328
329
330
331
332
333
334
335
336
337
338
339
340
341
342
343
344
345
346
347
348
349
350
351
352
353
354
355
356
357
358
359
360
361
362
363
364
365
366
367
368
369
370
371
372
373
374
375
376
377
378
379
380
381
382
383
384
385
386
387
388
389
390
391
392
From 936d432822fcd9aa2f018444cdc89e48e6d257d5 Mon Sep 17 00:00:00 2001
From: Angel Pons <th3fanbus@gmail.com>
Date: Sat, 7 May 2022 23:12:18 +0200
Subject: [PATCH 07/20] haswell NRI: Add pre-training steps

Implement pre-training steps, which consist of enabling ECC I/O and
filling the WDB (Write Data Buffer, stores test patterns) through a
magic LDAT port.

Change-Id: Ie2e09e3b218c4569ed8de5c5e1b05d491032e0f1
Signed-off-by: Angel Pons <th3fanbus@gmail.com>
---
 .../intel/haswell/native_raminit/Makefile.mk  |   1 +
 .../haswell/native_raminit/raminit_main.c     |  35 ++++
 .../haswell/native_raminit/raminit_native.h   |  24 +++
 .../haswell/native_raminit/reg_structs.h      |  45 +++++
 .../intel/haswell/native_raminit/setup_wdb.c  | 159 ++++++++++++++++++
 .../intel/haswell/registers/mchbar.h          |   9 +
 6 files changed, 273 insertions(+)
 create mode 100644 src/northbridge/intel/haswell/native_raminit/setup_wdb.c

diff --git a/src/northbridge/intel/haswell/native_raminit/Makefile.mk b/src/northbridge/intel/haswell/native_raminit/Makefile.mk
index e9212df9e6..8d7d4e4db0 100644
--- a/src/northbridge/intel/haswell/native_raminit/Makefile.mk
+++ b/src/northbridge/intel/haswell/native_raminit/Makefile.mk
@@ -10,5 +10,6 @@ romstage-y += memory_map.c
 romstage-y += raminit_main.c
 romstage-y += raminit_native.c
 romstage-y += reut.c
+romstage-y += setup_wdb.c
 romstage-y += spd_bitmunching.c
 romstage-y += timings_refresh.c
diff --git a/src/northbridge/intel/haswell/native_raminit/raminit_main.c b/src/northbridge/intel/haswell/native_raminit/raminit_main.c
index 94b268468c..5e4674957d 100644
--- a/src/northbridge/intel/haswell/native_raminit/raminit_main.c
+++ b/src/northbridge/intel/haswell/native_raminit/raminit_main.c
@@ -3,6 +3,7 @@
 #include <assert.h>
 #include <console/console.h>
 #include <cpu/intel/haswell/haswell.h>
+#include <delay.h>
 #include <device/pci_ops.h>
 #include <northbridge/intel/haswell/chip.h>
 #include <northbridge/intel/haswell/haswell.h>
@@ -12,6 +13,39 @@
 
 #include "raminit_native.h"
 
+static enum raminit_status pre_training(struct sysinfo *ctrl)
+{
+	/* Skip on S3 resume */
+	if (ctrl->bootmode == BOOTMODE_S3)
+		return RAMINIT_STATUS_SUCCESS;
+
+	for (uint8_t channel = 0; channel < NUM_CHANNELS; channel++) {
+		for (uint8_t slot = 0; slot < NUM_SLOTS; slot++) {
+			if (!rank_in_ch(ctrl, slot + slot, channel))
+				continue;
+
+			printk(RAM_DEBUG, "C%uS%u:\n", channel, slot);
+			printk(RAM_DEBUG, "\tMR0: 0x%04x\n", ctrl->mr0[channel][slot]);
+			printk(RAM_DEBUG, "\tMR1: 0x%04x\n", ctrl->mr1[channel][slot]);
+			printk(RAM_DEBUG, "\tMR2: 0x%04x\n", ctrl->mr2[channel][slot]);
+			printk(RAM_DEBUG, "\tMR3: 0x%04x\n", ctrl->mr3[channel][slot]);
+			printk(RAM_DEBUG, "\n");
+		}
+		if (ctrl->is_ecc) {
+			union mad_dimm_reg mad_dimm = {
+				.raw = mchbar_read32(MAD_DIMM(channel)),
+			};
+			/* Enable ECC I/O */
+			mad_dimm.ecc_mode = 1;
+			mchbar_write32(MAD_DIMM(channel), mad_dimm.raw);
+			/* Wait 4 usec after enabling the ECC I/O, needed by HW */
+			udelay(4);
+		}
+	}
+	setup_wdb(ctrl);
+	return RAMINIT_STATUS_SUCCESS;
+}
+
 struct task_entry {
 	enum raminit_status (*task)(struct sysinfo *);
 	bool is_enabled;
@@ -25,6 +59,7 @@ static const struct task_entry cold_boot[] = {
 	{ configure_mc,                                           true, "CONFMC",     },
 	{ configure_memory_map,                                   true, "MEMMAP",     },
 	{ do_jedec_init,                                          true, "JEDECINIT",  },
+	{ pre_training,                                           true, "PRETRAIN",   },
 };
 
 /* Return a generic stepping value to make stepping checks simpler */
diff --git a/src/northbridge/intel/haswell/native_raminit/raminit_native.h b/src/northbridge/intel/haswell/native_raminit/raminit_native.h
index 4bc2a4955f..1971b44b66 100644
--- a/src/northbridge/intel/haswell/native_raminit/raminit_native.h
+++ b/src/northbridge/intel/haswell/native_raminit/raminit_native.h
@@ -35,6 +35,13 @@
 
 #define RTTNOM_MASK		(BIT(9) | BIT(6) | BIT(2))
 
+#define BASIC_VA_PAT_SPREAD_8	0x01010101
+
+#define WDB_CACHE_LINE_SIZE	8
+
+#define NUM_WDB_CL_MUX_SEEDS	3
+#define NUM_CADB_MUX_SEEDS	3
+
 /* ZQ calibration types */
 enum {
 	ZQ_INIT,	/* DDR3: ZQCL with tZQinit, LPDDR3: ZQ Init  with tZQinit  */
@@ -316,6 +323,23 @@ void reut_issue_mrs_all(
 
 enum raminit_status reut_issue_zq(struct sysinfo *ctrl, uint8_t chanmask, uint8_t zq_type);
 
+void write_wdb_fixed_pat(
+	const struct sysinfo *ctrl,
+	const uint8_t patterns[],
+	const uint8_t pat_mask[],
+	uint8_t spread,
+	uint16_t start);
+
+void write_wdb_va_pat(
+	const struct sysinfo *ctrl,
+	uint32_t agg_mask,
+	uint32_t vic_mask,
+	uint8_t vic_rot,
+	uint16_t start);
+
+void program_wdb_lfsr(const struct sysinfo *ctrl, bool cleanup);
+void setup_wdb(const struct sysinfo *ctrl);
+
 uint8_t get_rx_bias(const struct sysinfo *ctrl);
 
 uint8_t get_tCWL(uint32_t mem_clock_mhz);
diff --git a/src/northbridge/intel/haswell/native_raminit/reg_structs.h b/src/northbridge/intel/haswell/native_raminit/reg_structs.h
index 9929f617fe..7aa8d8c8b2 100644
--- a/src/northbridge/intel/haswell/native_raminit/reg_structs.h
+++ b/src/northbridge/intel/haswell/native_raminit/reg_structs.h
@@ -335,6 +335,18 @@ union mcscheds_cbit_reg {
 	uint32_t raw;
 };
 
+union reut_pat_cl_mux_lmn_reg {
+	struct __packed {
+		uint32_t l_data_select : 1; // Bits  0:0
+		uint32_t en_sweep_freq : 1; // Bits  1:1
+		uint32_t               : 6; // Bits  7:2
+		uint32_t l_counter     : 8; // Bits 15:8
+		uint32_t m_counter     : 8; // Bits 23:16
+		uint32_t n_counter     : 8; // Bits 31:24
+	};
+	uint32_t raw;
+};
+
 union reut_pat_cadb_prog_reg {
 	struct __packed {
 		uint32_t addr : 16; // Bits 15:0
@@ -439,6 +451,39 @@ union reut_misc_odt_ctrl_reg {
 	uint32_t raw;
 };
 
+union ldat_pdat_reg {
+	struct __packed {
+		uint32_t fast_addr : 12; // Bits 11:0
+		uint32_t           :  4; // Bits 15:12
+		uint32_t addr_en   :  1; // Bits 16:16
+		uint32_t seq_en    :  1; // Bits 17:17
+		uint32_t pol_0     :  1; // Bits 18:18
+		uint32_t pol_1     :  1; // Bits 19:19
+		uint32_t cmd_a     :  4; // Bits 23:20
+		uint32_t cmd_b     :  4; // Bits 27:24
+		uint32_t cmd_c     :  4; // Bits 31:28
+	};
+	uint32_t raw;
+};
+
+union ldat_sdat_reg {
+	struct __packed {
+		uint32_t bank_sel   : 4; // Bits  3:0
+		uint32_t            : 1; // Bits  4:4
+		uint32_t array_sel  : 5; // Bits  9:5
+		uint32_t cmp        : 1; // Bits 10:10
+		uint32_t replicate  : 1; // Bits 11:11
+		uint32_t dword      : 4; // Bits 15:12
+		uint32_t mode       : 2; // Bits 17:16
+		uint32_t mpmap      : 6; // Bits 23:18
+		uint32_t mpb_offset : 4; // Bits 27:24
+		uint32_t stage_en   : 1; // Bits 28:28
+		uint32_t shadow     : 2; // Bits 30:29
+		uint32_t            : 1; // Bits 31:31
+	};
+	uint32_t raw;
+};
+
 union mcscheds_dft_misc_reg {
 	struct __packed {
 		uint32_t wdar                 :  1; // Bits  0:0
diff --git a/src/northbridge/intel/haswell/native_raminit/setup_wdb.c b/src/northbridge/intel/haswell/native_raminit/setup_wdb.c
new file mode 100644
index 0000000000..ec37c48415
--- /dev/null
+++ b/src/northbridge/intel/haswell/native_raminit/setup_wdb.c
@@ -0,0 +1,159 @@
+/* SPDX-License-Identifier: GPL-2.0-or-later */
+
+#include <northbridge/intel/haswell/haswell.h>
+#include <types.h>
+
+#include "raminit_native.h"
+
+static void ldat_write_cacheline(
+	const struct sysinfo *const ctrl,
+	const uint8_t chunk,
+	const uint16_t start,
+	const uint64_t data)
+{
+	for (uint8_t channel = 0; channel < NUM_CHANNELS; channel++) {
+		if (!does_ch_exist(ctrl, channel))
+			continue;
+
+		/*
+		 * Do not do a 64-bit write here. The register is not aligned
+		 * to a 64-bit boundary, which could potentially cause issues.
+		 */
+		mchbar_write32(QCLK_ch_LDAT_DATA_IN_x(channel, 0), data & UINT32_MAX);
+		mchbar_write32(QCLK_ch_LDAT_DATA_IN_x(channel, 1), data >> 32);
+		/*
+		 * Set REPLICATE = 0 as you don't want to replicate the data.
+		 * Set BANK_SEL to the chunk you want to write the 64 bits to.
+		 * Set ARRAY_SEL = 0 (the MC WDB) and MODE = 1.
+		 */
+		const union ldat_sdat_reg ldat_sdat = {
+			.bank_sel = chunk,
+			.mode     = 1,
+		};
+		mchbar_write32(QCLK_ch_LDAT_SDAT(channel), ldat_sdat.raw);
+		/*
+		 * Finally, write the PDAT register indicating which cacheline
+		 * of the WDB you want to write to by setting FAST_ADDR field
+		 * to one of the 64 cache lines. Also set CMD_B in the PDAT
+		 * register to 4'b1000, indicating that this is a LDAT write.
+		 */
+		const union ldat_pdat_reg ldat_pdat = {
+			.fast_addr = MIN(start, 0xfff),
+			.cmd_b     = 8,
+		};
+		mchbar_write32(QCLK_ch_LDAT_PDAT(channel), ldat_pdat.raw);
+	}
+}
+
+static void clear_ldat_mode(const struct sysinfo *const ctrl)
+{
+	for (uint8_t channel = 0; channel < NUM_CHANNELS; channel++)
+		mchbar_write32(QCLK_ch_LDAT_SDAT(channel), 0);
+}
+
+void write_wdb_fixed_pat(
+	const struct sysinfo *const ctrl,
+	const uint8_t patterns[],
+	const uint8_t pat_mask[],
+	const uint8_t spread,
+	const uint16_t start)
+{
+	for (uint8_t chunk = 0; chunk < WDB_CACHE_LINE_SIZE; chunk++) {
+		uint64_t data = 0;
+		for (uint8_t b = 0; b < 64; b++) {
+			const uint8_t beff  = b % spread;
+			const uint8_t burst = patterns[pat_mask[beff]];
+			if (burst & BIT(chunk))
+				data |= 1ULL << b;
+		}
+		ldat_write_cacheline(ctrl, chunk, start, data);
+	}
+	clear_ldat_mode(ctrl);
+}
+
+static inline uint32_t rol_u32(const uint32_t val)
+{
+	return (val << 1) | ((val >> 31) & 1);
+}
+
+void write_wdb_va_pat(
+	const struct sysinfo *const ctrl,
+	const uint32_t agg_mask,
+	const uint32_t vic_mask,
+	const uint8_t vic_rot,
+	const uint16_t start)
+{
+	static const uint8_t va_mask_to_compressed[4] = {0xaa, 0xc0, 0xcc, 0xf0};
+	uint32_t v_mask = vic_mask;
+	uint32_t a_mask = agg_mask;
+	for (uint8_t v = 0; v < vic_rot; v++) {
+		uint8_t compressed[32] = {0};
+		/* Iterate through all 32 bits and create a compressed version of cacheline */
+		for (uint8_t b = 0; b < ARRAY_SIZE(compressed); b++) {
+			const uint8_t vic = !!(v_mask & BIT(b));
+			const uint8_t agg = !!(a_mask & BIT(b));
+			const uint8_t index = !vic << 1 | agg << 0;
+			compressed[b] = va_mask_to_compressed[index];
+		}
+		for (uint8_t chunk = 0; chunk < WDB_CACHE_LINE_SIZE; chunk++) {
+			uint32_t data = 0;
+			for (uint8_t b = 0; b < ARRAY_SIZE(compressed); b++)
+				data |= !!(compressed[b] & BIT(chunk)) << b;
+
+			const uint64_t data64 = (uint64_t)data << 32 | data;
+			ldat_write_cacheline(ctrl, chunk, start + v, data64);
+		}
+		v_mask = rol_u32(v_mask);
+		a_mask = rol_u32(a_mask);
+	}
+	clear_ldat_mode(ctrl);
+}
+
+void program_wdb_lfsr(const struct sysinfo *ctrl, const bool cleanup)
+{
+	/* Cleanup LFSR seeds are sequential */
+	const uint32_t cleanup_seeds[NUM_WDB_CL_MUX_SEEDS] = { 0xaaaaaa, 0xcccccc, 0xf0f0f0 };
+	const uint32_t regular_seeds[NUM_WDB_CL_MUX_SEEDS] = { 0xa10ca1, 0xef0d08, 0xad0a1e };
+	const uint32_t *seeds = cleanup ? cleanup_seeds : regular_seeds;
+
+	for (uint8_t channel = 0; channel < NUM_CHANNELS; channel++) {
+		if (!does_ch_exist(ctrl, channel))
+			continue;
+
+		for (uint8_t i = 0; i < NUM_WDB_CL_MUX_SEEDS; i++) {
+			mchbar_write32(REUT_ch_PAT_WDB_CL_MUX_RD_x(channel, i), seeds[i]);
+			mchbar_write32(REUT_ch_PAT_WDB_CL_MUX_WR_x(channel, i), seeds[i]);
+		}
+	}
+}
+
+void setup_wdb(const struct sysinfo *ctrl)
+{
+	const uint32_t amask[9] = {
+		0x86186186, 0x18618618, 0x30c30c30,
+		0xa28a28a2, 0x8a28a28a, 0x14514514,
+		0x28a28a28, 0x92492492, 0x24924924,
+	};
+	const uint32_t vmask = 0x41041041;
+
+	/* Fill first 8 entries with simple 2-LFSR VA pattern */
+	write_wdb_va_pat(ctrl, 0, BASIC_VA_PAT_SPREAD_8, 8, 0);
+
+	/* Fill next 54 entries with 3-LFSR VA pattern */
+	for (uint8_t a = 0; a < ARRAY_SIZE(amask); a++)
+		write_wdb_va_pat(ctrl, amask[a], vmask, 6, 8 + a * 6);
+
+	program_wdb_lfsr(ctrl, false);
+	for (uint8_t channel = 0; channel < NUM_CHANNELS; channel++) {
+		if (!does_ch_exist(ctrl, channel))
+			continue;
+
+		const union reut_pat_cl_mux_lmn_reg wdb_cl_mux_lmn = {
+			.en_sweep_freq = 1,
+			.l_counter     = 1,
+			.m_counter     = 1,
+			.n_counter     = 10,
+		};
+		mchbar_write32(REUT_ch_PAT_WDB_CL_MUX_LMN(channel), wdb_cl_mux_lmn.raw);
+	}
+}
diff --git a/src/northbridge/intel/haswell/registers/mchbar.h b/src/northbridge/intel/haswell/registers/mchbar.h
index 4fc78a7f43..f8408e51a0 100644
--- a/src/northbridge/intel/haswell/registers/mchbar.h
+++ b/src/northbridge/intel/haswell/registers/mchbar.h
@@ -94,6 +94,11 @@
 #define TC_BANK_RANK_D_ch(ch)			_MCMAIN_C(0x4014, ch)
 #define SC_ROUNDT_LAT_ch(ch)			_MCMAIN_C(0x4024, ch)
 
+#define REUT_ch_PAT_WDB_CL_MUX_WR_x(ch, x)	_MCMAIN_C_X(0x4048, ch, x) /* x in 0 .. 2 */
+#define REUT_ch_PAT_WDB_CL_MUX_RD_x(ch, x)	_MCMAIN_C_X(0x4054, ch, x) /* x in 0 .. 2 */
+
+#define REUT_ch_PAT_WDB_CL_MUX_LMN(ch)		_MCMAIN_C(0x4078, ch)
+
 #define SC_WR_ADD_DELAY_ch(ch)			_MCMAIN_C(0x40d0, ch)
 
 #define REUT_ch_MISC_CKE_CTRL(ch)		_MCMAIN_C(0x4190, ch)
@@ -110,6 +115,10 @@
 #define MC_INIT_STATE_ch(ch)			_MCMAIN_C(0x42a0, ch)
 #define TC_SRFTP_ch(ch)				_MCMAIN_C(0x42a4, ch)
 
+#define QCLK_ch_LDAT_PDAT(ch)			_MCMAIN_C(0x42d0, ch)
+#define QCLK_ch_LDAT_SDAT(ch)			_MCMAIN_C(0x42d4, ch)
+#define QCLK_ch_LDAT_DATA_IN_x(ch, x)		_MCMAIN_C_X(0x42dc, ch, x) /* x in 0 .. 1 */
+
 #define REUT_GLOBAL_ERR				0x4804
 
 #define REUT_ch_SEQ_CFG(ch)			(0x48a8 + 8 * (ch))
-- 
2.39.2