1 files changed, 234 insertions, 60 deletions
diff --git a/util/spkmodem_recv/spkmodem-recv.c b/util/spkmodem_recv/spkmodem-recv.c
index d448c84b..07937387 100644
--- a/util/spkmodem_recv/spkmodem-recv.c
+++ b/util/spkmodem_recv/spkmodem-recv.c
@@ -1,11 +1,47 @@
-/* SPDX-License-Identifier: GPL-2.0-or-later */
-/* SPDX-FileCopyrightText: 2013 Free Software Foundation, Inc. */
-/* Usage: parec --channels=1 --rate=48000 --format=s16le | ./spkmodem-recv */
-
-/* Forked from coreboot's version, at util/spkmodem_recv/ in coreboot.git,
- * revision 5c2b5fcf2f9c9259938fd03cfa3ea06b36a007f0 as of 3 January 2022.
- * This version is heavily modified, re-written based on OpenBSD Kernel Source
- * File Style Guide (KNF); this change is Copyright 2023,2026 Leah Rowe. */
+/*
+ * SPDX-License-Identifier: GPL-2.0-or-later
+ * Copyright (c) 2013 Free Software Foundation, Inc.
+ * Copyright (c) 2023, 2026 Leah Rowe <leah@libreboot.org>
+ *
+ * This program receives text encoded as pulses on the PC speaker,
+ * and decodes them via simple FSK (Frequency Shift Keying)
+ * demodulation and FIR (Finite Impulse Response) frequency
+ * discriminator.
+ *
+ * It waits for specific tones at specific intervals.
+ * It detects tones within the audio stream and reconstructs
+ * characters bit-by-bit as the encoded modem signal is received.
+ * This is performance-efficient on most CPUs, and has relatively
+ * high tolerance for noisy signals (similar to techniques used
+ * for data stored on audio cassette tapes).
+
+ * This is a special interface provided by coreboot and GNU GRUB,
+ * for computers that lack serial ports (useful for debugging).
+ * Note that GRUB and coreboot can both send these signals; this
+ * tool merely decodes them. This tool does not *encode*, only
+ * decode.
+ *
+ * Usage example (NOTE: little endian!):
+ * parec --channels=1 --rate=48000 --format=s16le | ./spkmodem-recv
+ *
+ * Originally provided by GNU GRUB, this version is a heavily
+ * modified fork that complies with the OpenBSD Kernel Source
+ * File Style Guide (KNF) instead of GNU coding standards; it
+ * emphasises strict error handling, portability and code
+ * quality, as characterised by OpenBSD projects. Several magic
+ * numbers have been tidied up, calculated (not hardcoded) and
+ * thoroughly explained, unlike in the original version.
+ *
+ * The original version was essentially a blob, masquerading as
+ * source code. This forked source code is therefore the result
+ * of extensive reverse engineering (of the GNU source code)!
+ * This cleaned up code and extensive commenting will thoroughly
+ * explain how the decoding works. This was done as an academic
+ * exercise in 2023, continuing in 2026.
+ *
+ * This fork of spkmodem-recv is provided with Libreboot releases:
+ * https://libreboot.org/
+ */
 
 #define _POSIX_SOURCE
 
@@ -24,22 +60,95 @@
 #include <string.h>
 #include <unistd.h>
 
+/*
+ * spkmodem is essentially using FSK (Frequency Shift Keying)
+ * with two primary tones representing encoded bits,
+ * separated by a framing tone.
+ * Very cheap on CPU cycles and avoids needing something more
+ * expensive like FFT or Goertzel filters, and tolerates
+ * weak/noisy signals.
+ */
+
+/*
+ * Frequency of audio in Hz
+ */
+#define SAMPLE_RATE 48000
+
+/*
+ * One analysis frame spans 5 ms.
+ *
+ *   frame_time = SAMPLES_PER_FRAME / SAMPLE_RATE
+ *
+ * With the default sample rate (48 kHz):
+ *
+ *   frame_time = N / 48000
+ *   0.005 s = N / 48000
+ *   N = 0.005 × 48000 = 240 samples
+ */
 #define SAMPLES_PER_FRAME 240
-#define MAX_SAMPLES (2 * SAMPLES_PER_FRAME)
 
-#define FREQ_SEP_MIN 5
-#define FREQ_SEP_MAX 15
+/*
+ * Number of analysis frames per second.
+ *
+ * Each increment in the frequency counters corresponds
+ * roughly to this many Hertz of tone frequency.
+ *
+ * With the default values:
+ *   FRAME_RATE = 48000 / 240 = 200 Hz
+ */
+#define FRAME_RATE ((SAMPLE_RATE) / (SAMPLES_PER_FRAME))
 
-#define FREQ_DATA_MIN 15
-#define FREQ_DATA_THRESHOLD 25
-#define FREQ_DATA_MAX 60
+/*
+ * Two FIR windows are maintained; one for data tone,
+ * and one for the separator tone. They are positioned
+ * one frame apart in the ring buffer.
+ */
+#define MAX_SAMPLES (2 * (SAMPLES_PER_FRAME))
+
+/*
+ * Approx byte offset for ring buffer span, just for
+ * easier debug output correlating to the audio stream.
+ */
+#define SAMPLE_OFFSET ((MAX_SAMPLES) * (sizeof(short)))
 
+/*
+ * Expected tone ranges (approximate, derived from spkmodem).
+ * These values are intentionally wide because real-world setups
+ * often involve microphones, room acoustics, and cheap ADCs.
+ */
+#define SEP_TONE_MIN_HZ 1000
+#define SEP_TONE_MAX_HZ 3000
+
+#define DATA_TONE_MIN_HZ 3000
+#define DATA_TONE_MAX_HZ 12000
+
+/* Mid point used to distinguish the two data tones. */
+#define DATA_TONE_THRESHOLD_HZ 5000
+
+/*
+ * Convert tone frequency ranges into pulse counts within the
+ * sliding analysis window.
+ *
+ * pulse_count ≈ tone_frequency / FRAME_RATE
+ * where FRAME_RATE = SAMPLE_RATE / SAMPLES_PER_FRAME.
+ */
+#define FREQ_SEP_MIN ((SEP_TONE_MIN_HZ) / (FRAME_RATE))
+#define FREQ_SEP_MAX ((SEP_TONE_MAX_HZ) / (FRAME_RATE))
+
+#define FREQ_DATA_MIN ((DATA_TONE_MIN_HZ) / (FRAME_RATE))
+#define FREQ_DATA_MAX ((DATA_TONE_MAX_HZ) / (FRAME_RATE))
+
+#define FREQ_DATA_THRESHOLD ((DATA_TONE_THRESHOLD_HZ) / (FRAME_RATE))
+
+/*
+ * Sample amplitude threshold used to convert the waveform
+ * into a pulse stream. Values near zero are regarded as noise.
+ */
 #define THRESHOLD 500
 
 #define READ_BUF 4096
 
 struct decoder_state {
-	signed short frame[MAX_SAMPLES];
 	unsigned char pulse[MAX_SAMPLES];
 
 	signed short inbuf[READ_BUF];
@@ -61,10 +170,12 @@ struct decoder_state {
 	unsigned char ascii;
 
 	int debug;
+	int swap_bytes;
 };
 
 static const char *argv0;
 
+static int host_is_big_endian(void);
 static void handle_audio(struct decoder_state *st);
 static int valid_signal(struct decoder_state *st);
 static void decode_pulse(struct decoder_state *st);
@@ -84,10 +195,6 @@ extern int optind;
 extern int opterr;
 extern int optopt;
 
-#ifndef errno
-extern int errno;
-#endif
-
 int
 main(int argc, char **argv)
 {
@@ -110,15 +217,15 @@ main(int argc, char **argv)
 	argv0 = argv[0];
 
 	while ((c = getopt(argc, argv, "d")) != -1) {
-		switch (c) {
-		case 'd':
-			st.debug = 1;
-			break;
-		default:
+		if (c != 'd')
 			usage();
-		}
+		st.debug = 1;
+		break;
 	}
 
+	if (host_is_big_endian())
+		st.swap_bytes = 1;
+
 	setvbuf(stdout, NULL, _IONBF, 0);
 
 	for (;;)
@@ -127,29 +234,42 @@ main(int argc, char **argv)
 	return EXIT_SUCCESS;
 }
 
+static int
+host_is_big_endian(void)
+{
+	unsigned int x = 1;
+	return (*(unsigned char *)&x == 0);
+}
+
 static void
 handle_audio(struct decoder_state *st)
 {
 	int sample;
 
+	/*
+	 * If the modem signal disappears for several frames,
+	 * discard the partially assembled character.
+	 */
 	if (st->sample_count > (3 * SAMPLES_PER_FRAME))
 		reset_char(st);
+	if (!valid_signal(st)) {
+		decode_pulse(st);
+		return;
+	}
 
-	if (valid_signal(st)) {
-
-		if (set_ascii_bit(st) < 0)
-			print_char(st);
-
-		st->sample_count = 0;
-
-		for (sample = 0; sample < SAMPLES_PER_FRAME; sample++)
-			decode_pulse(st);
+	if (set_ascii_bit(st) < 0)
+		print_char(st);
 
-	} else {
+	st->sample_count = 0;
+	for (sample = 0; sample < SAMPLES_PER_FRAME; sample++)
 		decode_pulse(st);
-	}
 }
 
+/*
+ * Verify that the observed pulse densities fall within the
+ * expected ranges for spkmodem tones. This prevents random noise
+ * from being misinterpreted as data.
+ */
 static int
 valid_signal(struct decoder_state *st)
 {
@@ -159,37 +279,82 @@ valid_signal(struct decoder_state *st)
 	    st->freq_data < FREQ_DATA_MAX);
 }
 
+/*
+ * Main demodulation step (moving-sum FIR filter).
+ */
 static void
 decode_pulse(struct decoder_state *st)
 {
 	unsigned char old_ring, old_sep;
 	unsigned char new_pulse;
+	int ringpos;
+	int sep_pos;
+	signed short sample;
 
-	old_ring = st->pulse[st->ringpos];
-	old_sep = st->pulse[st->sep_pos];
+	ringpos = st->ringpos;
+	sep_pos = st->sep_pos;
 
+	/*
+	 * Sliding rectangular FIR (Finite Impulse Response) filter.
+	 *
+	 * After thresholding, the signal becomes a stream of 0/1 pulses.
+	 * The decoder measures pulse density over two windows:
+	 *
+	 * freq_data: pulses in the "data" window
+	 * freq_separator: pulses in the "separator" window
+	 *
+	 * Instead of calculating each window every time (O(N) per frame), we
+	 * update the window sums incrementally:
+	 *
+	 *   sum_new = sum_old - pulse_leaving + pulse_entering
+	 *
+	 * This keeps the filter O(1) per sample instead of O(N).
+	 * The technique is equivalent to a rectangular FIR filter
+	 * implemented as a sliding moving sum.
+	 *
+	 * The two windows are exactly SAMPLES_PER_FRAME apart in the ring
+	 * buffer, so the pulse leaving the data window is simultaneously
+	 * entering the separator window.
+	 */
+	old_ring = st->pulse[ringpos];
+	old_sep  = st->pulse[sep_pos];
 	st->freq_data -= old_ring;
 	st->freq_data += old_sep;
 	st->freq_separator -= old_sep;
 
-	st->frame[st->ringpos] = read_sample(st);
+	sample = read_sample(st);
 
-	if (st->frame[st->ringpos] > THRESHOLD ||
-	    st->frame[st->ringpos] < -THRESHOLD)
+	/*
+	 * Convert the waveform sample into a pulse (0 or 1).
+	 *
+	 * The unsigned comparison creates a small dead zone near zero,
+	 * suppressing small amplitude noise from microphones or
+	 * cheap ADCs. Real PC speaker tones are far outside this
+	 * range, so they still produce clean pulses.
+	 */
+	if ((unsigned)(sample + THRESHOLD)
+	    > (unsigned)(2 * THRESHOLD))
 		new_pulse = 1;
 	else
 		new_pulse = 0;
 
-	st->pulse[st->ringpos] = new_pulse;
+	st->pulse[ringpos] = new_pulse;
 	st->freq_separator += new_pulse;
 
-	st->ringpos++;
-	if (st->ringpos >= MAX_SAMPLES)
-		st->ringpos = 0;
+	/*
+	 * Advance both FIR windows through the ring buffer.
+	 * The separator window always stays one frame ahead
+	 * of the data window.
+	 */
+	ringpos++;
+	if (ringpos >= MAX_SAMPLES)
+		ringpos = 0;
+	sep_pos++;
+	if (sep_pos >= MAX_SAMPLES)
+		sep_pos = 0;
 
-	st->sep_pos++;
-	if (st->sep_pos >= MAX_SAMPLES)
-		st->sep_pos = 0;
+	st->ringpos = ringpos;
+	st->sep_pos = sep_pos;
 
 	st->sample_count++;
 }
@@ -198,6 +363,8 @@ static signed short
 read_sample(struct decoder_state *st)
 {
 	size_t n;
+	signed short sample;
+	unsigned short u;
 
 	while (st->inpos >= st->inlen) {
 
@@ -205,29 +372,41 @@ read_sample(struct decoder_state *st)
 		    READ_BUF, stdin);
 
 		if (n == 0) {
+			if (ferror(stdin))
+				err(errno, "stdin read");
 			if (feof(stdin))
 				exit(EXIT_SUCCESS);
-			err(errno, "stdin read");
 		}
 
 		st->inpos = 0;
 		st->inlen = n;
 	}
 
-	return st->inbuf[st->inpos++];
+	sample = st->inbuf[st->inpos++];
+
+	if (st->swap_bytes) {
+		u = (unsigned short)sample;
+		u = (u >> 8) | (u << 8);
+
+		sample = (signed short)u;
+	}
+
+	return sample;
 }
 
+/*
+ * Each validated frame contributes one bit of modem data.
+ * Bits are accumulated MSB-first into the ASCII byte.
+ */
 static int
 set_ascii_bit(struct decoder_state *st)
 {
 	if (st->debug)
 		print_stats(st);
-
 	if (st->freq_data < FREQ_DATA_THRESHOLD)
 		st->ascii |= (1 << st->ascii_bit);
 
 	st->ascii_bit--;
-
 	return st->ascii_bit;
 }
 
@@ -247,9 +426,7 @@ print_stats(struct decoder_state *st)
 {
 	long pos;
 
-	pos = ftell(stdin);
-
-	if (pos == -1) {
+	if ((pos = ftell(stdin)) == -1) {
 		printf("%d %d %d\n",
 		    st->freq_data,
 		    st->freq_separator,
@@ -261,7 +438,7 @@ print_stats(struct decoder_state *st)
 	    st->freq_data,
 	    st->freq_separator,
 	    FREQ_DATA_THRESHOLD,
-	    pos - sizeof(st->frame));
+	    pos - SAMPLE_OFFSET);
 }
 
 static void
@@ -282,10 +459,7 @@ err(int errval, const char *msg, ...)
 	vfprintf(stderr, msg, ap);
 	va_end(ap);
 
-	if (!errno)
-		errno = errval;
-
-	fprintf(stderr, ": %s\n", strerror(errno));
+	fprintf(stderr, ": %s\n", strerror(errval));
 	exit(EXIT_FAILURE);
 }