1 files changed, 203 insertions, 136 deletions
diff --git a/util/spkmodem_decode/spkmodem-decode.c b/util/spkmodem_decode/spkmodem-decode.c
index 8c57ff9a..3b3b33f8 100644
--- a/util/spkmodem_decode/spkmodem-decode.c
+++ b/util/spkmodem_decode/spkmodem-decode.c
@@ -63,6 +63,7 @@
 #endif
 
 #include <errno.h>
+#include <limits.h>
 #include <stdio.h>
 #include <stdarg.h>
 #include <stdlib.h>
@@ -143,7 +144,7 @@
  * Convert tone frequency ranges into pulse counts within the
  * sliding analysis window.
  *
- * pulse_count ≈ tone_frequency / FRAME_RATE
+ * pulse_count = tone_frequency / FRAME_RATE
  * where FRAME_RATE = SAMPLE_RATE / SAMPLES_PER_FRAME.
  */
 #define FREQ_SEP_MIN ((SEP_TONE_MIN_HZ) / (FRAME_RATE))
@@ -157,15 +158,10 @@
 /*
  * These determine how long the program will wait during
  * tone auto-detection, before shifting to defaults.
- *
- * For tone auto-detection (time waiting for detection)
- * NOTE: you could multiply SAMPLE_PER_FRAME instead
- * of SAMPLE_RATE in LEARN_SAMPLES, for more granularity.
- * Here, 1 * SAMPLE_RATE represents 1 second, which seems
- * like a reasonable, conservative default wait time.
+ * It is done every LEARN_FRAMES number of frames.
  */
 #define LEARN_SECONDS 1
-#define LEARN_SAMPLES ((LEARN_SECONDS) * (SAMPLE_RATE))
+#define LEARN_FRAMES ((LEARN_SECONDS) * (FRAME_RATE))
 
 /*
  * Sample amplitude threshold used to convert the waveform
@@ -209,35 +205,70 @@ struct decoder_state {
 	int freq_min;
 	int freq_max;
 	int freq_threshold;
-	int learn_samples;
+	int learn_frames;
+
+	/* previous sample used for edge detection */
+	signed short prev_sample;
 };
 
 static const char *argv0;
 
+/*
+ * 16-bit little endian words are read
+ * continuously. we will swap them, if
+ * the host cpu is big endian.
+ */
 static int host_is_big_endian(void);
+
+/* main loop */
 static void handle_audio(struct decoder_state *st);
-static void collect_separator_tone(struct decoder_state *st);
-static int valid_signal(struct decoder_state *st);
+
+/* separate tone tolerances */
+static void select_separator_tone(struct decoder_state *st);
+static int is_valid_signal(struct decoder_state *st);
+
+/* output to terminal */
+static int set_ascii_bit(struct decoder_state *st);
+static void print_char(struct decoder_state *st);
+static void reset_char(struct decoder_state *st);
+
+/* process samples/frames */
 static void decode_pulse(struct decoder_state *st);
-static void auto_detect_tone(struct decoder_state *st);
-static int silent_signal(struct decoder_state *st);
 static signed short read_sample(struct decoder_state *st);
+static void read_words(struct decoder_state *st);
+
+/* continually adjust tone */
+static void detect_tone(struct decoder_state *st);
+static int silent_signal(struct decoder_state *st);
 static void select_low_tone(struct decoder_state *st);
-static int set_ascii_bit(struct decoder_state *st);
-static void print_char(struct decoder_state *st);
+
+/* debug */
 static void print_stats(struct decoder_state *st);
-static void reset_char(struct decoder_state *st);
 
+/* error handling / usage */
 static void err(int errval, const char *msg, ...);
 static void usage(void);
 static const char *progname(void);
 
+/* portability (old systems) */
 int getopt(int, char * const *, const char *);
 extern char *optarg;
 extern int optind;
 extern int opterr;
 extern int optopt;
 
+#ifndef CHAR_BIT
+#define CHAR_BIT 8
+#endif
+
+typedef char static_assert_char_is_8_bits[(CHAR_BIT == 8) ? 1 : -1];
+typedef char static_assert_char_is_1[(sizeof(char) == 1) ? 1 : -1];
+typedef char static_assert_short[(sizeof(short) == 2) ? 1 : -1];
+typedef char static_assert_int_is_4[(sizeof(int) >= 4) ? 1 : -1];
+typedef char static_assert_twos_complement[
+    ((-1 & 3) == 3) ? 1 : -1
+];
+
 int
 main(int argc, char **argv)
 {
@@ -302,21 +333,26 @@ handle_audio(struct decoder_state *st)
 	int sample;
 
 	/*
-	 * If the modem signal disappears for several frames,
-	 * discard the partially assembled character.
+	 * If the modem signal disappears for several (read: 3)
+	 * frames, discard the partially assembled character.
 	 */
-	if (st->sample_count >= (3 * SAMPLES_PER_FRAME))
+	if (st->sample_count >= (3 * SAMPLES_PER_FRAME) ||
+	    st->freq_separator <= 0)
 		reset_char(st);
 
-	collect_separator_tone(st);
-	decode_pulse(st);
+	st->sample_count = 0;
+
+	/* process exactly one frame */
+	for (sample = 0; sample < SAMPLES_PER_FRAME; sample++)
+		decode_pulse(st);
+
+	select_separator_tone(st);
 
 	if (set_ascii_bit(st) < 0)
 		print_char(st);
 
-	st->sample_count = 0;
-	for (sample = 0; sample < SAMPLES_PER_FRAME; sample++)
-		decode_pulse(st);
+	/* Detect tone per each frame */
+	detect_tone(st);
 }
 
 /*
@@ -324,14 +360,11 @@ handle_audio(struct decoder_state *st)
  * (and auto-adjust tolerances)
  */
 static void
-collect_separator_tone(struct decoder_state *st)
+select_separator_tone(struct decoder_state *st)
 {
 	int avg;
 
-	if (valid_signal(st))
-		return;
-
-	if (st->sep_samples >= 50 && st->freq_separator <= 0)
+	if (!is_valid_signal(st))
 		return;
 
 	st->sep_sum += st->freq_separator;
@@ -345,6 +378,10 @@ collect_separator_tone(struct decoder_state *st)
 	st->sep_min = avg - SEP_TOLERANCE_PULSES;
 	st->sep_max = avg + SEP_TOLERANCE_PULSES;
 
+	/* reset calibration accumulators */
+	st->sep_sum = 0;
+	st->sep_samples = 0;
+
 	if (st->debug)
 		printf("separator calibrated: %dHz\n",
 		    avg * FRAME_RATE);
@@ -356,10 +393,55 @@ collect_separator_tone(struct decoder_state *st)
  * from being misinterpreted as data.
  */
 static int
-valid_signal(struct decoder_state *st)
+is_valid_signal(struct decoder_state *st)
+{
+	if (st->freq_data <= 0)
+		return 0;
+
+	if (st->freq_separator < st->sep_min ||
+	    st->freq_separator > st->sep_max)
+		return 0;
+
+	return 1;
+}
+
+/*
+ * Each validated frame contributes one bit of modem data.
+ * Bits are accumulated MSB-first into the ASCII byte.
+ */
+static int
+set_ascii_bit(struct decoder_state *st)
+{
+	if (st->debug)
+		print_stats(st);
+
+	if (!is_valid_signal(st))
+		return st->ascii_bit;
+
+	if (st->freq_data < st->freq_threshold)
+		st->ascii |= (1 << st->ascii_bit);
+
+	st->ascii_bit--;
+
+	return st->ascii_bit;
+}
+
+static void
+print_char(struct decoder_state *st)
 {
-	return (st->freq_separator > 0 &&
-	    st->freq_data > 0);
+	if (st->debug)
+		printf("<%c,%x>", st->ascii, st->ascii);
+	else
+		putchar(st->ascii);
+
+	reset_char(st);
+}
+
+static void
+reset_char(struct decoder_state *st)
+{
+	st->ascii = 0;
+	st->ascii_bit = 7;
 }
 
 /*
@@ -370,9 +452,11 @@ decode_pulse(struct decoder_state *st)
 {
 	unsigned char old_ring, old_sep;
 	unsigned char new_pulse;
+	signed short sample;
 	int ringpos;
 	int sep_pos;
-	signed short sample;
+	int diff_edge;
+	int diff_amp;
 
 	ringpos = st->ringpos;
 	sep_pos = st->sep_pos;
@@ -408,18 +492,33 @@ decode_pulse(struct decoder_state *st)
 	sample = read_sample(st);
 
 	/*
-	 * Convert the waveform sample into a pulse (0 or 1).
+	 * Avoid startup edge. Since
+	 * it's zero at startup, this
+	 * may wrongly produce a pulse
+	 */
+	if (st->sample_count == 0)
+		st->prev_sample = sample;
+
+	/*
+	 * Detect edges instead of amplitude.
+	 * This is more tolerant of weak microphones
+	 * and speaker distortion..
 	 *
-	 * The unsigned comparison creates a small dead zone near zero,
-	 * suppressing small amplitude noise from microphones or
-	 * cheap ADCs. Real PC speaker tones are far outside this
-	 * range, so they still produce clean pulses.
+	 * However, we check both slope edges and
+	 * amplitude, to mitagate noise.
 	 */
-	if ((unsigned)(sample + THRESHOLD)
-	    > (unsigned)(2 * THRESHOLD))
+	diff_amp = sample;
+	diff_edge = sample - st->prev_sample;
+	if (diff_edge < 0)
+		diff_edge = -diff_edge;
+	if (diff_amp < 0)
+		diff_amp = -diff_amp;
+	if (diff_edge > THRESHOLD &&
+	    diff_amp > THRESHOLD)
 		new_pulse = 1;
 	else
 		new_pulse = 0;
+	st->prev_sample = sample;
 
 	st->pulse[ringpos] = new_pulse;
 	st->freq_separator += new_pulse;
@@ -429,55 +528,92 @@ decode_pulse(struct decoder_state *st)
 	 * The separator window always stays one frame ahead
 	 * of the data window.
 	 */
-	ringpos++;
-	if (ringpos >= MAX_SAMPLES)
+	if (++ringpos >= MAX_SAMPLES)
 		ringpos = 0;
-	sep_pos++;
-	if (sep_pos >= MAX_SAMPLES)
+	if (++sep_pos >= MAX_SAMPLES)
 		sep_pos = 0;
 
 	st->ringpos = ringpos;
 	st->sep_pos = sep_pos;
 
-	/*
-	 * Attempt to auto-detect spkmodem tone
-	 */
-	auto_detect_tone(st);
-
 	st->sample_count++;
 }
 
+static signed short
+read_sample(struct decoder_state *st)
+{
+	signed short sample;
+	unsigned short u;
+
+	while (st->inpos >= st->inlen)
+		read_words(st);
+
+	sample = st->inbuf[st->inpos++];
+
+	if (st->swap_bytes) {
+		u = (unsigned short)sample;
+		u = (u >> 8) | (u << 8);
+
+		sample = (signed short)u;
+	}
+
+	return sample;
+}
+
+static void
+read_words(struct decoder_state *st)
+{
+	size_t n;
+
+	n = fread(st->inbuf, sizeof(st->inbuf[0]),
+	    READ_BUF, stdin);
+
+	if (n != 0) {
+		st->inpos = 0;
+		st->inlen = n;
+
+		return;
+	}
+
+	if (ferror(stdin))
+		err(errno, "stdin read");
+	if (feof(stdin))
+		exit(EXIT_SUCCESS);
+}
+
 /*
- * Observe signal for LEARN_SAMPLES samples (e.g. 1 second).
- * The exact amount of time is determined by LEARN_SAMPLES
- * divided by SAMPLE_RATE, logically. For example, if
- * LEARN_SAMPLES were half of the SAMPLE_RATE, this
- * corresponds to roughly 500ms before timeout.
- *
- * to guess the correct timing. If it fails,
- * fall back to known good values.
+ * Automatically detect spkmodem tone
  */
 static void
-auto_detect_tone(struct decoder_state *st)
+detect_tone(struct decoder_state *st)
 {
-	if (st->learn_samples >= LEARN_SAMPLES)
+	if (st->learn_frames >= LEARN_FRAMES)
 		return;
 
-	st->learn_samples++;
+	st->learn_frames++;
 
 	if (silent_signal(st))
 		return;
 
 	select_low_tone(st);
 
-	if (st->learn_samples == LEARN_SAMPLES) {
-		st->freq_threshold =
-		    (st->freq_min + st->freq_max) / 2;
+	if (st->learn_frames != LEARN_FRAMES)
+		return;
 
-		if (st->debug)
-			printf("auto threshold: %dHz\n",
-			    st->freq_threshold * FRAME_RATE);
-	}
+	/*
+	 * If the observed frequencies are too close,
+	 * learning likely failed (only one tone seen).
+	 * Keep the default threshold.
+	 */
+	if (st->freq_max - st->freq_min < 2)
+		return;
+
+	st->freq_threshold =
+	    (st->freq_min + st->freq_max) / 2;
+
+	if (st->debug)
+		printf("auto threshold: %dHz\n",
+		    st->freq_threshold * FRAME_RATE);
 }
 
 /*
@@ -517,68 +653,6 @@ select_low_tone(struct decoder_state *st)
 		st->freq_max = f;
 }
 
-static signed short
-read_sample(struct decoder_state *st)
-{
-	size_t n;
-	signed short sample;
-	unsigned short u;
-
-	while (st->inpos >= st->inlen) {
-
-		n = fread(st->inbuf, sizeof(st->inbuf[0]),
-		    READ_BUF, stdin);
-
-		if (n == 0) {
-			if (ferror(stdin))
-				err(errno, "stdin read");
-			if (feof(stdin))
-				exit(EXIT_SUCCESS);
-		}
-
-		st->inpos = 0;
-		st->inlen = n;
-	}
-
-	sample = st->inbuf[st->inpos++];
-
-	if (st->swap_bytes) {
-		u = (unsigned short)sample;
-		u = (u >> 8) | (u << 8);
-
-		sample = (signed short)u;
-	}
-
-	return sample;
-}
-
-/*
- * Each validated frame contributes one bit of modem data.
- * Bits are accumulated MSB-first into the ASCII byte.
- */
-static int
-set_ascii_bit(struct decoder_state *st)
-{
-	if (st->debug)
-		print_stats(st);
-	if (st->freq_data < st->freq_threshold)
-		st->ascii |= (1 << st->ascii_bit);
-
-	st->ascii_bit--;
-	return st->ascii_bit;
-}
-
-static void
-print_char(struct decoder_state *st)
-{
-	if (st->debug)
-		printf("<%c,%x>", st->ascii, st->ascii);
-	else
-		putchar(st->ascii);
-
-	reset_char(st);
-}
-
 static void
 print_stats(struct decoder_state *st)
 {
@@ -613,13 +687,6 @@ print_stats(struct decoder_state *st)
 }
 
 static void
-reset_char(struct decoder_state *st)
-{
-	st->ascii = 0;
-	st->ascii_bit = 7;
-}
-
-static void
 err(int errval, const char *msg, ...)
 {
 	va_list ap;