/* * SPDX-License-Identifier: GPL-2.0-or-later * Copyright (c) 2013 Free Software Foundation, Inc. * Copyright (c) 2023, 2026 Leah Rowe * * This program receives text encoded as pulses on the PC speaker, * and decodes them via simple FSK (Frequency Shift Keying) * demodulation and FIR (Finite Impulse Response) frequency * discriminator. * * It waits for specific tones at specific intervals. * It detects tones within the audio stream and reconstructs * characters bit-by-bit as the encoded modem signal is received. * This is performance-efficient on most CPUs, and has relatively * high tolerance for noisy signals (similar to techniques used * for data stored on audio cassette tapes). * * This is a special interface provided by coreboot and GNU GRUB, * for computers that lack serial ports (useful for debugging). * Note that GRUB and coreboot can both send these signals; this * tool merely decodes them. This tool does not *encode*, only * decode. * * Usage example (NOTE: little endian!): * parec --channels=1 --rate=48000 --format=s16le | ./spkmodem-decode * * Originally provided by GNU GRUB, this version is a heavily * modified fork that complies with the OpenBSD Kernel Source * File Style Guide (KNF) instead of GNU coding standards; it * emphasises strict error handling, portability and code * quality, as characterised by OpenBSD projects. Several magic * numbers have been tidied up, calculated (not hardcoded) and * thoroughly explained, unlike in the original version. * * The original version was essentially a blob, masquerading as * source code. This forked source code is therefore the result * of extensive reverse engineering (of the GNU source code)! * This cleaned up code and extensive commenting will thoroughly * explain how the decoding works. This was done as an academic * exercise in 2023, continuing in 2026. * * This fork of spkmodem-recv, called spkmodem-decode, is provided * with Libreboot releases: * https://libreboot.org/ * * The original GNU version is here, if you're morbidly curious: * https://cgit.git.savannah.gnu.org/cgit/grub.git/plain/util/spkmodem-recv.c?id=3dce38eb196f47bdf86ab028de74be40e13f19fd * * Libreboot's version was renamed to spkmodem-decode on 12 March 2026, * since Libreboot's version no longer closely resembles the GNU * version at all; ergo, a full rename was in order. GNU's version * was called spkmodem-recv. */ #define _POSIX_SOURCE /* * For OpenBSD define, to detect version * for deciding whether to use pledge(2) */ #ifdef __OpenBSD__ #include #endif #include #include #include #include #include #include /* * spkmodem is essentially using FSK (Frequency Shift Keying) * with two primary tones representing encoded bits, * separated by a framing tone. * Very cheap on CPU cycles and avoids needing something more * expensive like FFT or Goertzel filters, and tolerates * weak/noisy signals. */ /* * Frequency of audio in Hz * WARNING: if changing, make sure to adjust * SAMPLES_PER_FRAME accordingly (see maths below) */ #define SAMPLE_RATE 48000 /* * One analysis frame spans 5 ms. * * frame_time = SAMPLES_PER_FRAME / SAMPLE_RATE * * With the default sample rate (48 kHz): * * frame_time = N / 48000 * 0.005 s = N / 48000 * N = 0.005 × 48000 = 240 samples */ #define SAMPLES_PER_FRAME 240 /* * Number of analysis frames per second. * * Each increment in the frequency counters corresponds * roughly to this many Hertz of tone frequency. * * With the default values: * FRAME_RATE = 48000 / 240 = 200 Hz */ #define FRAME_RATE ((SAMPLE_RATE) / (SAMPLES_PER_FRAME)) /* * Two FIR windows are maintained; one for data tone, * and one for the separator tone. They are positioned * one frame apart in the ring buffer. */ #define MAX_SAMPLES (2 * (SAMPLES_PER_FRAME)) /* * Approx byte offset for ring buffer span, just for * easier debug output correlating to the audio stream. */ #define SAMPLE_OFFSET ((MAX_SAMPLES) * (sizeof(short))) /* * Expected tone ranges (approximate, derived from spkmodem). * These values are intentionally wide because real-world setups * often involve microphones, room acoustics, and cheap ADCs. */ #define SEP_TONE_MIN_HZ 1000 #define SEP_TONE_MAX_HZ 3000 #define SEP_TOLERANCE_PULSES \ (((SEP_TONE_MAX_HZ) - (SEP_TONE_MIN_HZ)) / (2 * (FRAME_RATE))) #define DATA_TONE_MIN_HZ 3000 #define DATA_TONE_MAX_HZ 12000 /* Mid point used to distinguish the two data tones. */ #define DATA_TONE_THRESHOLD_HZ 5000 /* * Convert tone frequency ranges into pulse counts within the * sliding analysis window. * * pulse_count ≈ tone_frequency / FRAME_RATE * where FRAME_RATE = SAMPLE_RATE / SAMPLES_PER_FRAME. */ #define FREQ_SEP_MIN ((SEP_TONE_MIN_HZ) / (FRAME_RATE)) #define FREQ_SEP_MAX ((SEP_TONE_MAX_HZ) / (FRAME_RATE)) #define FREQ_DATA_MIN ((DATA_TONE_MIN_HZ) / (FRAME_RATE)) #define FREQ_DATA_MAX ((DATA_TONE_MAX_HZ) / (FRAME_RATE)) #define FREQ_DATA_THRESHOLD ((DATA_TONE_THRESHOLD_HZ) / (FRAME_RATE)) /* * These determine how long the program will wait during * tone auto-detection, before shifting to defaults. * * For tone auto-detection (time waiting for detection) * NOTE: you could multiply SAMPLE_PER_FRAME instead * of SAMPLE_RATE in LEARN_SAMPLES, for more granularity. * Here, 1 * SAMPLE_RATE represents 1 second, which seems * like a reasonable, conservative default wait time. */ #define LEARN_SECONDS 1 #define LEARN_SAMPLES ((LEARN_SECONDS) * (SAMPLE_RATE)) /* * Sample amplitude threshold used to convert the waveform * into a pulse stream. Values near zero are regarded as noise. */ #define THRESHOLD 500 #define READ_BUF 4096 struct decoder_state { unsigned char pulse[MAX_SAMPLES]; signed short inbuf[READ_BUF]; size_t inpos; size_t inlen; int ringpos; int sep_pos; /* * Sliding window pulse counters * used to detect modem tones */ int freq_data; int freq_separator; int sample_count; int ascii_bit; unsigned char ascii; int debug; int swap_bytes; /* dynamic separator calibration */ int sep_sum; int sep_samples; int sep_min; int sep_max; /* for automatic tone detection */ int freq_min; int freq_max; int freq_threshold; int learn_samples; }; static const char *argv0; static int host_is_big_endian(void); static void handle_audio(struct decoder_state *st); static int valid_signal(struct decoder_state *st); static void decode_pulse(struct decoder_state *st); static void auto_detect_tone(struct decoder_state *st); static signed short read_sample(struct decoder_state *st); static int set_ascii_bit(struct decoder_state *st); static void print_char(struct decoder_state *st); static void print_stats(struct decoder_state *st); static void reset_char(struct decoder_state *st); static void err(int errval, const char *msg, ...); static void usage(void); static const char *progname(void); int getopt(int, char * const *, const char *); extern char *optarg; extern int optind; extern int opterr; extern int optopt; int main(int argc, char **argv) { struct decoder_state st; int c; argv0 = argv[0]; #if defined (__OpenBSD__) && defined(OpenBSD) #if OpenBSD >= 509 if (pledge("stdio", NULL) == -1) err(errno, "pledge"); #endif #endif memset(&st, 0, sizeof(st)); while ((c = getopt(argc, argv, "d")) != -1) { if (c != 'd') usage(); st.debug = 1; break; } /* fallback in case tone detection fails */ st.freq_min = 100000; st.freq_max = 0; st.freq_threshold = FREQ_DATA_THRESHOLD; /* * Used for separator calibration */ st.sep_min = FREQ_SEP_MIN; st.sep_max = FREQ_SEP_MAX; st.ascii_bit = 7; st.ringpos = 0; st.sep_pos = SAMPLES_PER_FRAME; if (host_is_big_endian()) st.swap_bytes = 1; setvbuf(stdout, NULL, _IONBF, 0); for (;;) handle_audio(&st); return EXIT_SUCCESS; } static int host_is_big_endian(void) { unsigned int x = 1; return (*(unsigned char *)&x == 0); } static void handle_audio(struct decoder_state *st) { int avg; int sample; /* * If the modem signal disappears for several frames, * discard the partially assembled character. */ if (st->sample_count >= (3 * SAMPLES_PER_FRAME)) reset_char(st); if (!valid_signal(st)) { /* * collect separator tone statistics * (and auto-adjust tolerances) */ if (st->sep_samples < 50 && st->freq_separator > 0) { st->sep_sum += st->freq_separator; st->sep_samples++; if (st->sep_samples == 50) { avg = st->sep_sum / st->sep_samples; /* ±3 pulse window */ st->sep_min = avg - SEP_TOLERANCE_PULSES; st->sep_max = avg + SEP_TOLERANCE_PULSES; if (st->debug) printf("separator calibrated: %dHz\n", avg * FRAME_RATE); } } decode_pulse(st); return; } if (set_ascii_bit(st) < 0) print_char(st); st->sample_count = 0; for (sample = 0; sample < SAMPLES_PER_FRAME; sample++) decode_pulse(st); } /* * Verify that the observed pulse densities fall within the * expected ranges for spkmodem tones. This prevents random noise * from being misinterpreted as data. */ static int valid_signal(struct decoder_state *st) { return (st->freq_separator > st->sep_min && st->freq_separator < st->sep_max && st->freq_data > FREQ_DATA_MIN && st->freq_data < FREQ_DATA_MAX); } /* * Main demodulation step (moving-sum FIR filter). */ static void decode_pulse(struct decoder_state *st) { unsigned char old_ring, old_sep; unsigned char new_pulse; int ringpos; int sep_pos; signed short sample; ringpos = st->ringpos; sep_pos = st->sep_pos; /* * Sliding rectangular FIR (Finite Impulse Response) filter. * * After thresholding, the signal becomes a stream of 0/1 pulses. * The decoder measures pulse density over two windows: * * freq_data: pulses in the "data" window * freq_separator: pulses in the "separator" window * * Instead of calculating each window every time (O(N) per frame), we * update the window sums incrementally: * * sum_new = sum_old - pulse_leaving + pulse_entering * * This keeps the filter O(1) per sample instead of O(N). * The technique is equivalent to a rectangular FIR filter * implemented as a sliding moving sum. * * The two windows are exactly SAMPLES_PER_FRAME apart in the ring * buffer, so the pulse leaving the data window is simultaneously * entering the separator window. */ old_ring = st->pulse[ringpos]; old_sep = st->pulse[sep_pos]; st->freq_data -= old_ring; st->freq_data += old_sep; st->freq_separator -= old_sep; sample = read_sample(st); /* * Convert the waveform sample into a pulse (0 or 1). * * The unsigned comparison creates a small dead zone near zero, * suppressing small amplitude noise from microphones or * cheap ADCs. Real PC speaker tones are far outside this * range, so they still produce clean pulses. */ if ((unsigned)(sample + THRESHOLD) > (unsigned)(2 * THRESHOLD)) new_pulse = 1; else new_pulse = 0; st->pulse[ringpos] = new_pulse; st->freq_separator += new_pulse; /* * Advance both FIR windows through the ring buffer. * The separator window always stays one frame ahead * of the data window. */ ringpos++; if (ringpos >= MAX_SAMPLES) ringpos = 0; sep_pos++; if (sep_pos >= MAX_SAMPLES) sep_pos = 0; st->ringpos = ringpos; st->sep_pos = sep_pos; /* * Attempt to auto-detect spkmodem tone */ auto_detect_tone(st); st->sample_count++; } /* * Observe signal for LEARN_SAMPLES samples (e.g. 1 second). * The exact amount of time is determined by LEARN_SAMPLES * divided by SAMPLE_RATE, logically. For example, if * LEARN_SAMPLES were half of the SAMPLE_RATE, this * corresponds to roughly 500ms before timeout. * * to guess the correct timing. If it fails, * fall back to known good values. */ static void auto_detect_tone(struct decoder_state *st) { if (st->learn_samples >= LEARN_SAMPLES) return; if (st->freq_data > 0) { if (st->freq_data < st->freq_min) st->freq_min = st->freq_data; if (st->freq_data > st->freq_max) st->freq_max = st->freq_data; } st->learn_samples++; if (st->learn_samples == LEARN_SAMPLES) { st->freq_threshold = (st->freq_min + st->freq_max) / 2; if (st->debug) printf("auto threshold: %dHz\n", st->freq_threshold * FRAME_RATE); } } static signed short read_sample(struct decoder_state *st) { size_t n; signed short sample; unsigned short u; while (st->inpos >= st->inlen) { n = fread(st->inbuf, sizeof(st->inbuf[0]), READ_BUF, stdin); if (n == 0) { if (ferror(stdin)) err(errno, "stdin read"); if (feof(stdin)) exit(EXIT_SUCCESS); } st->inpos = 0; st->inlen = n; } sample = st->inbuf[st->inpos++]; if (st->swap_bytes) { u = (unsigned short)sample; u = (u >> 8) | (u << 8); sample = (signed short)u; } return sample; } /* * Each validated frame contributes one bit of modem data. * Bits are accumulated MSB-first into the ASCII byte. */ static int set_ascii_bit(struct decoder_state *st) { if (st->debug) print_stats(st); if (st->freq_data < st->freq_threshold) st->ascii |= (1 << st->ascii_bit); st->ascii_bit--; return st->ascii_bit; } static void print_char(struct decoder_state *st) { if (st->debug) printf("<%c,%x>", st->ascii, st->ascii); else putchar(st->ascii); reset_char(st); } static void print_stats(struct decoder_state *st) { long pos; int data_hz = st->freq_data * FRAME_RATE; int sep_hz = st->freq_separator * FRAME_RATE; int sep_hz_min = st->sep_min * FRAME_RATE; int sep_hz_max = st->sep_max * FRAME_RATE; if ((pos = ftell(stdin)) == -1) { printf("%d %d %d data=%dHz sep=%dHz(min %dHz %dHz)\n", st->freq_data, st->freq_separator, st->freq_threshold, data_hz, sep_hz, sep_hz_min, sep_hz_max); return; } printf("%d %d %d @%ld data=%dHz sep=%dHz(min %dHz %dHz)\n", st->freq_data, st->freq_separator, st->freq_threshold, pos - SAMPLE_OFFSET, data_hz, sep_hz, sep_hz_min, sep_hz_max); } static void reset_char(struct decoder_state *st) { st->ascii = 0; st->ascii_bit = 7; } static void err(int errval, const char *msg, ...) { va_list ap; fprintf(stderr, "%s: ", progname()); va_start(ap, msg); vfprintf(stderr, msg, ap); va_end(ap); fprintf(stderr, ": %s\n", strerror(errval)); exit(EXIT_FAILURE); } static void usage(void) { fprintf(stderr, "usage: %s [-d]\n", progname()); exit(EXIT_FAILURE); } static const char * progname(void) { const char *p; if (argv0 == NULL || *argv0 == '\0') return ""; p = strrchr(argv0, '/'); if (p) return p + 1; else return argv0; }