diff options
Diffstat (limited to 'util')
| -rw-r--r-- | util/spkmodem_recv/spkmodem-recv.c | 123 |
1 files changed, 102 insertions, 21 deletions
diff --git a/util/spkmodem_recv/spkmodem-recv.c b/util/spkmodem_recv/spkmodem-recv.c index a1ea0fd6..07937387 100644 --- a/util/spkmodem_recv/spkmodem-recv.c +++ b/util/spkmodem_recv/spkmodem-recv.c @@ -4,8 +4,22 @@ * Copyright (c) 2023, 2026 Leah Rowe <leah@libreboot.org> * * This program receives text encoded as pulses on the PC speaker, - * and decodes them. This is a special type of interface provided - * by coreboot and GRUB, for computers that lack serial ports. + * and decodes them via simple FSK (Frequency Shift Keying) + * demodulation and FIR (Finite Impulse Response) frequency + * discriminator. + * + * It waits for specific tones at specific intervals. + * It detects tones within the audio stream and reconstructs + * characters bit-by-bit as the encoded modem signal is received. + * This is performance-efficient on most CPUs, and has relatively + * high tolerance for noisy signals (similar to techniques used + * for data stored on audio cassette tapes). + + * This is a special interface provided by coreboot and GNU GRUB, + * for computers that lack serial ports (useful for debugging). + * Note that GRUB and coreboot can both send these signals; this + * tool merely decodes them. This tool does not *encode*, only + * decode. * * Usage example (NOTE: little endian!): * parec --channels=1 --rate=48000 --format=s16le | ./spkmodem-recv @@ -14,7 +28,16 @@ * modified fork that complies with the OpenBSD Kernel Source * File Style Guide (KNF) instead of GNU coding standards; it * emphasises strict error handling, portability and code - * quality, as characterised by OpenBSD projects. + * quality, as characterised by OpenBSD projects. Several magic + * numbers have been tidied up, calculated (not hardcoded) and + * thoroughly explained, unlike in the original version. + * + * The original version was essentially a blob, masquerading as + * source code. This forked source code is therefore the result + * of extensive reverse engineering (of the GNU source code)! + * This cleaned up code and extensive commenting will thoroughly + * explain how the decoding works. This was done as an academic + * exercise in 2023, continuing in 2026. * * This fork of spkmodem-recv is provided with Libreboot releases: * https://libreboot.org/ @@ -38,11 +61,11 @@ #include <unistd.h> /* - * spkmodem is essentially used FSK (Frequency Shift Keying) - * with two frequencies: tone A (bit 0) and tone B (bit 1), - * detected via pulse density inside a given window. - * Very cheap on CPU cycles and avoids neeing something more - * complicated like FFT or Goertzel filters, and tolerates + * spkmodem is essentially using FSK (Frequency Shift Keying) + * with two primary tones representing encoded bits, + * separated by a framing tone. + * Very cheap on CPU cycles and avoids needing something more + * expensive like FFT or Goertzel filters, and tolerates * weak/noisy signals. */ @@ -52,17 +75,31 @@ #define SAMPLE_RATE 48000 /* - * A frame is 5ms, so samples - * per frame is N / 48000 = 0.005 (5ms) - * => N = 0.005 * 48000 = 240 + * One analysis frame spans 5 ms. + * + * frame_time = SAMPLES_PER_FRAME / SAMPLE_RATE + * + * With the default sample rate (48 kHz): + * + * frame_time = N / 48000 + * 0.005 s = N / 48000 + * N = 0.005 × 48000 = 240 samples */ #define SAMPLES_PER_FRAME 240 -/* = 48000 / 240 = 200 Hz resolution */ +/* + * Number of analysis frames per second. + * + * Each increment in the frequency counters corresponds + * roughly to this many Hertz of tone frequency. + * + * With the default values: + * FRAME_RATE = 48000 / 240 = 200 Hz + */ #define FRAME_RATE ((SAMPLE_RATE) / (SAMPLES_PER_FRAME)) /* - * Two FIR windows are maintained; one for data done, + * Two FIR windows are maintained; one for data tone, * and one for the separator tone. They are positioned * one frame apart in the ring buffer. */ @@ -89,7 +126,9 @@ #define DATA_TONE_THRESHOLD_HZ 5000 /* - * Convert tone frequencies within the sliding window, into pulse counts + * Convert tone frequency ranges into pulse counts within the + * sliding analysis window. + * * pulse_count ≈ tone_frequency / FRAME_RATE * where FRAME_RATE = SAMPLE_RATE / SAMPLES_PER_FRAME. */ @@ -103,7 +142,7 @@ /* * Sample amplitude threshold used to convert the waveform - * into a pulse stream. Values near zero regarded as noise. + * into a pulse stream. Values near zero are regarded as noise. */ #define THRESHOLD 500 @@ -207,6 +246,10 @@ handle_audio(struct decoder_state *st) { int sample; + /* + * If the modem signal disappears for several frames, + * discard the partially assembled character. + */ if (st->sample_count > (3 * SAMPLES_PER_FRAME)) reset_char(st); if (!valid_signal(st)) { @@ -222,6 +265,11 @@ handle_audio(struct decoder_state *st) decode_pulse(st); } +/* + * Verify that the observed pulse densities fall within the + * expected ranges for spkmodem tones. This prevents random noise + * from being misinterpreted as data. + */ static int valid_signal(struct decoder_state *st) { @@ -231,6 +279,9 @@ valid_signal(struct decoder_state *st) st->freq_data < FREQ_DATA_MAX); } +/* + * Main demodulation step (moving-sum FIR filter). + */ static void decode_pulse(struct decoder_state *st) { @@ -244,11 +295,29 @@ decode_pulse(struct decoder_state *st) sep_pos = st->sep_pos; /* - * Moving sum of the last N - * pulses; O(1) per sample. + * Sliding rectangular FIR (Finite Impulse Response) filter. + * + * After thresholding, the signal becomes a stream of 0/1 pulses. + * The decoder measures pulse density over two windows: + * + * freq_data: pulses in the "data" window + * freq_separator: pulses in the "separator" window + * + * Instead of calculating each window every time (O(N) per frame), we + * update the window sums incrementally: + * + * sum_new = sum_old - pulse_leaving + pulse_entering + * + * This keeps the filter O(1) per sample instead of O(N). + * The technique is equivalent to a rectangular FIR filter + * implemented as a sliding moving sum. + * + * The two windows are exactly SAMPLES_PER_FRAME apart in the ring + * buffer, so the pulse leaving the data window is simultaneously + * entering the separator window. */ old_ring = st->pulse[ringpos]; - old_sep = st->pulse[sep_pos]; + old_sep = st->pulse[sep_pos]; st->freq_data -= old_ring; st->freq_data += old_sep; st->freq_separator -= old_sep; @@ -256,8 +325,12 @@ decode_pulse(struct decoder_state *st) sample = read_sample(st); /* - * Noise suppression, with a frequency - * tolerancy defined by THRESHOLD. + * Convert the waveform sample into a pulse (0 or 1). + * + * The unsigned comparison creates a small dead zone near zero, + * suppressing small amplitude noise from microphones or + * cheap ADCs. Real PC speaker tones are far outside this + * range, so they still produce clean pulses. */ if ((unsigned)(sample + THRESHOLD) > (unsigned)(2 * THRESHOLD)) @@ -268,10 +341,14 @@ decode_pulse(struct decoder_state *st) st->pulse[ringpos] = new_pulse; st->freq_separator += new_pulse; + /* + * Advance both FIR windows through the ring buffer. + * The separator window always stays one frame ahead + * of the data window. + */ ringpos++; if (ringpos >= MAX_SAMPLES) ringpos = 0; - sep_pos++; if (sep_pos >= MAX_SAMPLES) sep_pos = 0; @@ -317,6 +394,10 @@ read_sample(struct decoder_state *st) return sample; } +/* + * Each validated frame contributes one bit of modem data. + * Bits are accumulated MSB-first into the ASCII byte. + */ static int set_ascii_bit(struct decoder_state *st) { |
