summaryrefslogtreecommitdiff
diff options
context:
space:
mode:
-rw-r--r--util/spkmodem_recv/spkmodem-recv.c123
1 files changed, 102 insertions, 21 deletions
diff --git a/util/spkmodem_recv/spkmodem-recv.c b/util/spkmodem_recv/spkmodem-recv.c
index a1ea0fd6..07937387 100644
--- a/util/spkmodem_recv/spkmodem-recv.c
+++ b/util/spkmodem_recv/spkmodem-recv.c
@@ -4,8 +4,22 @@
* Copyright (c) 2023, 2026 Leah Rowe <leah@libreboot.org>
*
* This program receives text encoded as pulses on the PC speaker,
- * and decodes them. This is a special type of interface provided
- * by coreboot and GRUB, for computers that lack serial ports.
+ * and decodes them via simple FSK (Frequency Shift Keying)
+ * demodulation and FIR (Finite Impulse Response) frequency
+ * discriminator.
+ *
+ * It waits for specific tones at specific intervals.
+ * It detects tones within the audio stream and reconstructs
+ * characters bit-by-bit as the encoded modem signal is received.
+ * This is performance-efficient on most CPUs, and has relatively
+ * high tolerance for noisy signals (similar to techniques used
+ * for data stored on audio cassette tapes).
+
+ * This is a special interface provided by coreboot and GNU GRUB,
+ * for computers that lack serial ports (useful for debugging).
+ * Note that GRUB and coreboot can both send these signals; this
+ * tool merely decodes them. This tool does not *encode*, only
+ * decode.
*
* Usage example (NOTE: little endian!):
* parec --channels=1 --rate=48000 --format=s16le | ./spkmodem-recv
@@ -14,7 +28,16 @@
* modified fork that complies with the OpenBSD Kernel Source
* File Style Guide (KNF) instead of GNU coding standards; it
* emphasises strict error handling, portability and code
- * quality, as characterised by OpenBSD projects.
+ * quality, as characterised by OpenBSD projects. Several magic
+ * numbers have been tidied up, calculated (not hardcoded) and
+ * thoroughly explained, unlike in the original version.
+ *
+ * The original version was essentially a blob, masquerading as
+ * source code. This forked source code is therefore the result
+ * of extensive reverse engineering (of the GNU source code)!
+ * This cleaned up code and extensive commenting will thoroughly
+ * explain how the decoding works. This was done as an academic
+ * exercise in 2023, continuing in 2026.
*
* This fork of spkmodem-recv is provided with Libreboot releases:
* https://libreboot.org/
@@ -38,11 +61,11 @@
#include <unistd.h>
/*
- * spkmodem is essentially used FSK (Frequency Shift Keying)
- * with two frequencies: tone A (bit 0) and tone B (bit 1),
- * detected via pulse density inside a given window.
- * Very cheap on CPU cycles and avoids neeing something more
- * complicated like FFT or Goertzel filters, and tolerates
+ * spkmodem is essentially using FSK (Frequency Shift Keying)
+ * with two primary tones representing encoded bits,
+ * separated by a framing tone.
+ * Very cheap on CPU cycles and avoids needing something more
+ * expensive like FFT or Goertzel filters, and tolerates
* weak/noisy signals.
*/
@@ -52,17 +75,31 @@
#define SAMPLE_RATE 48000
/*
- * A frame is 5ms, so samples
- * per frame is N / 48000 = 0.005 (5ms)
- * => N = 0.005 * 48000 = 240
+ * One analysis frame spans 5 ms.
+ *
+ * frame_time = SAMPLES_PER_FRAME / SAMPLE_RATE
+ *
+ * With the default sample rate (48 kHz):
+ *
+ * frame_time = N / 48000
+ * 0.005 s = N / 48000
+ * N = 0.005 × 48000 = 240 samples
*/
#define SAMPLES_PER_FRAME 240
-/* = 48000 / 240 = 200 Hz resolution */
+/*
+ * Number of analysis frames per second.
+ *
+ * Each increment in the frequency counters corresponds
+ * roughly to this many Hertz of tone frequency.
+ *
+ * With the default values:
+ * FRAME_RATE = 48000 / 240 = 200 Hz
+ */
#define FRAME_RATE ((SAMPLE_RATE) / (SAMPLES_PER_FRAME))
/*
- * Two FIR windows are maintained; one for data done,
+ * Two FIR windows are maintained; one for data tone,
* and one for the separator tone. They are positioned
* one frame apart in the ring buffer.
*/
@@ -89,7 +126,9 @@
#define DATA_TONE_THRESHOLD_HZ 5000
/*
- * Convert tone frequencies within the sliding window, into pulse counts
+ * Convert tone frequency ranges into pulse counts within the
+ * sliding analysis window.
+ *
* pulse_count ≈ tone_frequency / FRAME_RATE
* where FRAME_RATE = SAMPLE_RATE / SAMPLES_PER_FRAME.
*/
@@ -103,7 +142,7 @@
/*
* Sample amplitude threshold used to convert the waveform
- * into a pulse stream. Values near zero regarded as noise.
+ * into a pulse stream. Values near zero are regarded as noise.
*/
#define THRESHOLD 500
@@ -207,6 +246,10 @@ handle_audio(struct decoder_state *st)
{
int sample;
+ /*
+ * If the modem signal disappears for several frames,
+ * discard the partially assembled character.
+ */
if (st->sample_count > (3 * SAMPLES_PER_FRAME))
reset_char(st);
if (!valid_signal(st)) {
@@ -222,6 +265,11 @@ handle_audio(struct decoder_state *st)
decode_pulse(st);
}
+/*
+ * Verify that the observed pulse densities fall within the
+ * expected ranges for spkmodem tones. This prevents random noise
+ * from being misinterpreted as data.
+ */
static int
valid_signal(struct decoder_state *st)
{
@@ -231,6 +279,9 @@ valid_signal(struct decoder_state *st)
st->freq_data < FREQ_DATA_MAX);
}
+/*
+ * Main demodulation step (moving-sum FIR filter).
+ */
static void
decode_pulse(struct decoder_state *st)
{
@@ -244,11 +295,29 @@ decode_pulse(struct decoder_state *st)
sep_pos = st->sep_pos;
/*
- * Moving sum of the last N
- * pulses; O(1) per sample.
+ * Sliding rectangular FIR (Finite Impulse Response) filter.
+ *
+ * After thresholding, the signal becomes a stream of 0/1 pulses.
+ * The decoder measures pulse density over two windows:
+ *
+ * freq_data: pulses in the "data" window
+ * freq_separator: pulses in the "separator" window
+ *
+ * Instead of calculating each window every time (O(N) per frame), we
+ * update the window sums incrementally:
+ *
+ * sum_new = sum_old - pulse_leaving + pulse_entering
+ *
+ * This keeps the filter O(1) per sample instead of O(N).
+ * The technique is equivalent to a rectangular FIR filter
+ * implemented as a sliding moving sum.
+ *
+ * The two windows are exactly SAMPLES_PER_FRAME apart in the ring
+ * buffer, so the pulse leaving the data window is simultaneously
+ * entering the separator window.
*/
old_ring = st->pulse[ringpos];
- old_sep = st->pulse[sep_pos];
+ old_sep = st->pulse[sep_pos];
st->freq_data -= old_ring;
st->freq_data += old_sep;
st->freq_separator -= old_sep;
@@ -256,8 +325,12 @@ decode_pulse(struct decoder_state *st)
sample = read_sample(st);
/*
- * Noise suppression, with a frequency
- * tolerancy defined by THRESHOLD.
+ * Convert the waveform sample into a pulse (0 or 1).
+ *
+ * The unsigned comparison creates a small dead zone near zero,
+ * suppressing small amplitude noise from microphones or
+ * cheap ADCs. Real PC speaker tones are far outside this
+ * range, so they still produce clean pulses.
*/
if ((unsigned)(sample + THRESHOLD)
> (unsigned)(2 * THRESHOLD))
@@ -268,10 +341,14 @@ decode_pulse(struct decoder_state *st)
st->pulse[ringpos] = new_pulse;
st->freq_separator += new_pulse;
+ /*
+ * Advance both FIR windows through the ring buffer.
+ * The separator window always stays one frame ahead
+ * of the data window.
+ */
ringpos++;
if (ringpos >= MAX_SAMPLES)
ringpos = 0;
-
sep_pos++;
if (sep_pos >= MAX_SAMPLES)
sep_pos = 0;
@@ -317,6 +394,10 @@ read_sample(struct decoder_state *st)
return sample;
}
+/*
+ * Each validated frame contributes one bit of modem data.
+ * Bits are accumulated MSB-first into the ASCII byte.
+ */
static int
set_ascii_bit(struct decoder_state *st)
{