// Prerelease version, 8/23/05 // Author: Jim Battle // This program reads a .WAV file and attempts to decode the data stream // off that recording. The audio format must be in RIFF WAV format, // non-compressed, one or two channels. It allows for mono/stereo input, // any input sampling rate (lower sampling rates increase the decoding // error rate), and 8b or 16b samples. // The Datapoint 2200 records data bits using a FSK method, namely one // half cycle at frequency X means a one bit and one full cycle at // frequency 2X means a zero bit. This is stated this way because // the samples processed here are off a tape recorder running at 1 7/8 ips // while the datapoint runs its tape at 7.5 ips, a factor of four times // faster. To be specific, at this reduced speed, a one bit is a half cycle // of 481.25 Hz and a zero bit is a full cycle of 962.5 Hz. The original // 7.5 ips frequencies are 1925 Hz and 3850 Hz. The preamble while the // tape is coming up to speed is a train of 1 bits. // // The structure of the data on a tape is as follows: // // (1) bytes are eight bits // // (2) a sync pattern of 010 brackets each byte, so for example // two bytes would be (sync)(byte0)(sync)(byte1)(sync). thus // the character rate is 350 cps at 7.5 ips. // // (3) in the forward direction, bits of a byte are recorded // msb first, lsb last. // // (4) a record is a sequence of bytes with valid sync patterns // followed by an all 1's byte and an invalid sync (of 111) // // (5) the inter-record gap time is about 280 ms, or 98 character times // // (6) trains of one bits are recorded before and after a record to // allow a PLL to track the signal and to form the end of record // marker at both sides of a byte of 1 bits and a sync of 111. // There isn't a whole lot of high-powered thinking going on here. // It is just a lot of guess work on my part as to what seems to work // and what doesn't. Start simple and add complexity as required. // // The program is structured to operate on a rolling window through // the file, rather than doing the easier thing of sucking in the // whole file and then operating on it. This is because the files // can last a few minutes, which would consume a prohibitive amount // of memory. #include #include #include // for varargs #include #include // define as "1" on little endian architectures, otherwise define as "0" // FIXME: do this automatically #define LITTLE_ENDIAN 1 // support dumping records as intel hex files #define SUPPORT_HEX 1 // at least this many "1" bits must be s seen before a sync pattern in order // to be taken seriously. this is required because as the tape speed slows // at the end of a record, sometimes false syncs are detected. #define SYNC_THRESHOLD 32 // ======================================================================== // type definitions typedef unsigned long uint32; typedef long int32; typedef unsigned short uint16; typedef short int16; typedef unsigned char uint8; typedef char int8; typedef int16 sample_t; #define MIN(a,b) (((a)<(b))?(a):(b)) #define MAX(a,b) (((a)>(b))?(a):(b)) #define ABS(a) (((a)<(0))?(-a):(a)) // ======================================================================== // global variables // command line options int opt_v; // verbosity level bool opt_x; // explode mode int opt_x_num; // chunk number char *opt_ofn; // output filename char *opt_ifn; // input filename enum { OFMT_TAP=0, OFMT_BIN, OFMT_HEX } opt_ofmt; // .wav file attributes bool inmono; // input file is mono (1) or stereo (0) int sample_bytes; // number of bytes per sample uint32 expected_samples; // number of samples in file uint32 sample_rate; // samples/second FILE *fIn; // input audio file handle // period of a zero bit, in Hz, at 1 7/8 ips const float zero_freq = 962.5f; // pll lock range = nominal +/- 25% const float lock_range = 0.25f; float min_samples_per_bit; float max_samples_per_bit; float samples_per_bit; // bit period, in samples float pll_period; // current PLL estimate of bit period // ======================================================================== // filtered reporting void tfprintf(int verbosity, FILE *fp, char *fmt, ...) { char buff[1000]; va_list args; if (verbosity <= opt_v) { va_start(args, fmt); _vsnprintf(buff, sizeof(buff), fmt, args); va_end(args); fputs(buff, fp); } } void tprintf(int verbosity, char *fmt, ...) { char buff[1000]; va_list args; if (verbosity <= opt_v) { va_start(args, fmt); _vsnprintf(buff, sizeof(buff), fmt, args); va_end(args); fputs(buff, stdout); } } // ======================================================================== // WAV file parsing // RIFF WAV file format // __________________________ // | RIFF WAVE Chunk | // | groupID = 'RIFF' | // | riffType = 'WAVE' | // | __________________ | // | | Format Chunk | | // | | ckID = 'fmt ' | | // | |__________________| | // | __________________ | // | | Sound Data Chunk | | // | | ckID = 'data' | | // | |__________________| | // |__________________________| // // although it is legal to have more than one data chunk, this // program assumes there is only one. const uint32 RiffID = ('R' << 0) | ('I' << 8) | ('F' << 16) | ('F' << 24); const uint32 WaveID = ('W' << 0) | ('A' << 8) | ('V' << 16) | ('E' << 24); typedef struct { uint32 groupID; // should be 'RIFF' uint32 riffBytes; // number of bytes in file after this header uint32 riffType; // should be 'WAVE' } RIFF_t; const uint32 FmtID = ('f' << 0) | ('m' << 8) | ('t' << 16) | (' ' << 24); typedef struct { uint32 chunkID; // should be 'fmt ' int32 chunkSize; // not including first 8 bytes of header int16 FormatTag; // 1=uncompressed uint16 Channels; // number of audio channels uint32 Frequency; // sample frequency uint32 AvgBPS; // we'll ignore this uint16 BlockAlign; // we'll ignore this uint16 BitsPerSample; } FormatChunk_t; const uint32 DataID = ('d' << 0) | ('a' << 8) | ('t' << 16) | ('a' << 24); typedef struct { uint32 chunkID; // must be 'data' int32 chunkSize; // not including first 8 bytes of header // unsigned char data[]; // everything that follows } DataChunk_t; #define SWAP16(v) ( ((((uint16)(v)) & 0x00FF) << 8) | \ ((((uint16)(v)) & 0xFFFF) >> 8) ) #define SWAP32(v) ( ((((uint32)(v)) & 0x000000FF) << 24) | \ ((((uint32)(v)) & 0x0000FF00) << 8) | \ ((((uint32)(v)) & 0x00FF0000) >> 8) | \ ((((uint32)(v)) & 0xFF000000) >> 24) ) // make sure the file just opened is a WAV file, and if so, // read some of the critical parameters void CheckHeader(void) { RIFF_t RiffHdr; FormatChunk_t FormatHdr; DataChunk_t DataHdr; if (fread(&RiffHdr, sizeof(RiffHdr), 1, fIn) != 1) { fprintf(stderr, "Error: file didn't contain a RIFF header\n"); exit(-1); } #if !LITTLE_ENDIAN // do endian swap RiffHdr.groupID = (uint32)SWAP32(RiffHdr.groupID); RiffHdr.riffBytes = (uint32)SWAP32(RiffHdr.riffBytes); RiffHdr.riffType = (uint32)SWAP32(RiffHdr.riffType); #endif if ((RiffHdr.groupID != RiffID) || (RiffHdr.riffType != WaveID)) { fprintf(stderr, "Error: input file not a WAV file\n"); exit(-1); } if (fread(&FormatHdr, sizeof(FormatHdr), 1, fIn) != 1) { fprintf(stderr, "Error: file didn't contain a format block\n"); exit(-1); } #if !LITTLE_ENDIAN // do endian swap FormatHdr.chunkID = (uint32)SWAP32(FormatHdr.chunkID); FormatHdr.chunkSize = ( int32)SWAP32(FormatHdr.chunkSize); FormatHdr.FormatTag = ( int16)SWAP16(FormatHdr.FormatTag); FormatHdr.Channels = (uint16)SWAP16(FormatHdr.Channels); FormatHdr.Frequency = (uint32)SWAP32(FormatHdr.Frequency); FormatHdr.AvgBPS = (uint32)SWAP32(FormatHdr.AvgBPS); FormatHdr.BlockAlign = (uint16)SWAP16(FormatHdr.BlockAlign); FormatHdr.BitsPerSample = (uint16)SWAP16(FormatHdr.BitsPerSample); #endif if ((FormatHdr.chunkID != FmtID) || (FormatHdr.chunkSize != sizeof(FormatHdr)-8)) { fprintf(stderr, "Error: I can't deal with this type of WAV file\n"); exit(-1); } if (FormatHdr.BitsPerSample == 8) sample_bytes = 1; else if (FormatHdr.BitsPerSample == 16) sample_bytes = 2; else { fprintf(stderr, "Error: samples must be either 8b or 16b\n"); exit(-1); } if (FormatHdr.FormatTag != 1) { fprintf(stderr, "Error: can't deal with compressed data\n"); exit(-1); } if (FormatHdr.Channels != 1 && FormatHdr.Channels != 2) { fprintf(stderr, "Error: can't handle too many channels\n"); exit(-1); } inmono = (FormatHdr.Channels == 1); sample_rate = FormatHdr.Frequency; if (FormatHdr.Frequency < 11000) { fprintf(stderr, "Warning: the sample rate is low -- it might hurt conversion\n"); exit(-1); } if (fread(&DataHdr, sizeof(DataHdr), 1, fIn) != 1) { fprintf(stderr, "Error: file didn't contain a DATA header\n"); exit(-1); } if (DataHdr.chunkID != DataID) { fprintf(stderr, "Error: I can't deal with this type of WAV file (2)\n"); exit(-1); } // compute dependent parameters expected_samples = DataHdr.chunkSize / (sample_bytes * FormatHdr.Channels); // period, in samples samples_per_bit = (float)sample_rate / zero_freq; // pll lock range = nominal +/- 25% pll_period = samples_per_bit; // start at nominal min_samples_per_bit = samples_per_bit * (1.0f - lock_range); max_samples_per_bit = samples_per_bit * (1.0f + lock_range); float sec = (float)expected_samples/sample_rate; tprintf(1, "File: '%s'\n", opt_ifn); tprintf(1, "WAV format: %d samples/sec, %db %s\n", sample_rate, 8*sample_bytes, inmono ? "mono" : "stereo"); tprintf(1, "Expected # of samples: %ld", expected_samples); tprintf(1, " (%.2f seconds)\n",sec); } // return a mono sample from the input file. // if the input file is in stereo, it averages the two channels. // this routine reads blocks for efficiency, but doles out one sample // per request. sample_t GetMonoSample(void) { sample_t newsamp; if (inmono && (sample_bytes == 1)) { // mono 8b int8 b0 = (int8)fgetc(fIn) - 128; newsamp = (sample_t)(b0 << 8); } else if (inmono && (sample_bytes == 2)) { uint8 b0 = (uint8)fgetc(fIn); int8 b1 = (int8)fgetc(fIn); newsamp = (sample_t)((b1<<8) + b0); } else if (!inmono && (sample_bytes == 1)) { // 8b stereo int8 b0 = fgetc(fIn) - 128; int8 b1 = fgetc(fIn) - 128; newsamp = (sample_t)((b0+b1) << 7); } else if (!inmono && (sample_bytes == 2)) { // 16b stereo uint8 b0 = (uint8)fgetc(fIn); int8 b1 = (int8)fgetc(fIn); uint8 b2 = (uint8)fgetc(fIn); int8 b3 = (int8)fgetc(fIn); sample_t left = (sample_t)((b1<<8) + b0); sample_t right = (sample_t)((b3<<8) + b2); newsamp = (left+right+1)>>1; } return newsamp; } // ========================================================================= // we maintain in memory a WORKBUFSIZE window of samples. // the tricky part is that the start of the buffer might be anywhere // in the buffer, and all other addressing is modulo the buffer size. // // if any access is made to the last BUMP samples, we roll the window // over by QTRWORKBUF samples. // number of samples we are holding in memory #define WORKBUFSIZE 16384 // make this a power of two #define WORKBUFMASK (WORKBUFSIZE-1) // for modulo wrapping #define HALFWORKBUF (WORKBUFSIZE/2) #define QTRWORKBUF (WORKBUFSIZE/4) #define BUMP 1024 int32 windowstart; // the oldest sample in the buffer int32 windowoffset; // where in the window the oldest sample lives // this holds the samples we are working on sample_t inbuf[WORKBUFSIZE]; int sample_addr(int32 n) { #if 1 if (n < windowstart || n >= windowstart+WORKBUFSIZE) { fprintf(stderr, "Error: sample %d, workbuf[] access out of range\n", n); exit(-1); } #endif // see if we are bumping into the end of the buffer if (n >= windowstart + WORKBUFSIZE - BUMP) { // move the input buffer window forward 'n' samples. // we do this through modulo address arithmetic; // we don't actually move the samples around. // adjust pointers windowstart += QTRWORKBUF; windowoffset = (QTRWORKBUF + windowoffset) & WORKBUFMASK; // fill up newly exposed portion of buffer int off = (windowoffset + 3*QTRWORKBUF) & WORKBUFMASK; for(int32 t=off; t> 0) & 0xFF), obuff_fp ); fputc( ((v >> 8) & 0xFF), obuff_fp ); fputc( ((v >> 16) & 0xFF), obuff_fp ); fputc( ((v >> 24) & 0xFF), obuff_fp ); } void StreamEnd(void) { int len = strlen(opt_ofn) + 3; // +3 for safety char *ofn = (char *)malloc(len); assert(ofn != NULL); int n; // when to open the file bool open_it = opt_x || // explode requested (opt_ofmt != OFMT_TAP) || // bin or hex (opt_x_num == 0); // first // when to close the file bool close_it = opt_x || // explode requested (opt_ofmt != OFMT_TAP); // bin or hex if (opt_x) { sprintf(ofn, opt_ofn, opt_x_num); // serialize filename } else { strcpy(ofn, opt_ofn); } opt_x_num++; if (open_it) { // open a file obuff_fp = fopen(ofn, "wb"); if (obuff_fp == NULL) { fprintf(stderr, "Error: couldn't open file '%s'\n", ofn); exit(-1); } } switch (opt_ofmt) { case OFMT_BIN: // binary for(n=0; n> 0) + (n >> 8) + (n >> 16) + (n >> 24) + 0x00; for(int nn=0; nn 0) StreamEnd(); } void StreamDone(void) { if (obuff_fp != NULL) fclose(obuff_fp); } // ========================================================================= // bitstream decoder #define BH_SIZE (128) // size of bithistory buffer #define BH_MASK (BH_SIZE-1) enum { BS_LOST=0, BS_PREAMBLE, // in a train of 1 bits BS_PREAMBLE_0, // train of 1s followed by 0 BS_PREAMBLE_01, // train of 1s followed by 0,1 BS_BYTE, // decoding byte stream BS_GAP }; int BSstate = BS_LOST; int BSbyteCount; void Bit(uint32 time, int bit) { static int count; static int bits; static int bithistory[BH_SIZE]; static int bh_put = 0; static int bh_get = 0; static int bh_vld = 0; tprintf(3, "sample %d: decoded bit %d\n", time, bit); bithistory[bh_put] = bit; bh_put = (bh_put + 1) & BH_MASK; bh_vld = MIN(bh_vld+1, BH_SIZE); switch (BSstate) { case BS_LOST: if (bit == 1) { BSstate = BS_PREAMBLE; count = 1; } break; case BS_PREAMBLE: if (bit == 1) { count++; } else if ((bit == 0) && (count > SYNC_THRESHOLD)) BSstate = BS_PREAMBLE_0; else BSstate = BS_LOST; break; case BS_PREAMBLE_0: if (bit == 1) { BSstate = BS_PREAMBLE_01; } else { tprintf(2, "sample %d: preamble lost after %d bits\n", time, count+2); BSstate = BS_LOST; } break; case BS_PREAMBLE_01: if (bit == 0) { tprintf(1, "sample %d: preamble and sync after %d bits\n", time, count+3); BSstate = BS_BYTE; count = 0; BSbyteCount = 0; bits = 0x00; StreamStart(); } else { tprintf(2, "sample %d: preamble lost after %d bit %d\n", time, count+3); BSstate = BS_PREAMBLE; count = 2; } break; case BS_BYTE: bits = (bits << 1) | (bit > 0); count++; if (count == 11) { bits &= 0x7FF; if ((bits & 7) == 2) { // ... 010 sync code bits = (bits >> 3) & 0xFF; // in fwd direction, data comes off the tape lsb first bits = ((bits & 0x01) << 7) | ((bits & 0x02) << 5) | ((bits & 0x04) << 3) | ((bits & 0x08) << 1) | ((bits & 0x10) >> 1) | ((bits & 0x20) >> 3) | ((bits & 0x40) >> 5) | ((bits & 0x80) >> 7); tprintf(2, "sample %d: byte 0x%02X (%03o)\n", time, bits, bits); StreamByte(bits); count = 0; BSbyteCount++; } else if (bits == 0x7FF) { tprintf(1, "sample %d: hit valid gap after %d bytes, PLL period=%f\n", time, BSbyteCount, pll_period); StreamEnd(); count = 0; BSstate = BS_GAP; } else { if (BSbyteCount == 0) { tprintf(1, "sample %d: bad sync code %03X; apparently it was not a valid sync\n", time, bits & 7); } else { tprintf(1, "sample %d: bad sync code %03X\n", time, bits & 7); } StreamError(0); BSstate = BS_LOST; } } break; case BS_GAP: count++; if (bit == 2) { tprintf(2, "sample %d: skipped %d bits to mid-gap\n", time, count); BSstate = BS_LOST; } break; default: assert(0); break; } } // ========================================================================= // bit decoder // sequentially the stream of transitions and turn them into a stream of // bits. of course, this routine must guard against illegal transitions // that are bound to come up. int ClassifyTransition(uint32 time, uint32 duration) { if (duration < 0.25f * pll_period) return -1; // too short if (duration < 0.75f * pll_period) return 0; // a half "zero" bit if (duration < 1.50f * pll_period) return 1; // a "one" bit return 2; // too long } // update the phase lock loop // we want the phase detector to lock in on the right phase without // being too responsive nor too slow. this is just a guess. // it is the amount the pll phase is adjusted at each bit cell time // as a fraction of the error between expected and actual. const float pll_bump = 0.15f; void PLL(int duration) { const float phaseDiff = duration - pll_period; pll_period += phaseDiff * pll_bump; pll_period = MAX(min_samples_per_bit, pll_period); pll_period = MIN(max_samples_per_bit, pll_period); tprintf(4, "pll period = %f\n", pll_period); } void DecodeBits(uint32 time) { enum { DB_INIT=0, DB_HALF_BIT, DB_READY }; static DBstate = DB_INIT; static int prevTime = 0; // end of previous bit static int halfTime = 0; // end of half bit int type; switch (DBstate) { case DB_INIT: prevTime = time; DBstate = DB_READY; break; case DB_HALF_BIT: type = ClassifyTransition(time, time-halfTime); switch (type) { case -1: if ((BSstate == BS_BYTE) && (BSbyteCount > 0)) tprintf(1, "sample %d: Warning: runt pulse when short pulse expected\n", time); Bit(time, -1); // completed second short period PLL(time-prevTime); // correct for phase error DBstate = DB_READY; break; case 0: Bit(time, 0); // completed second short period PLL(time-prevTime); // correct for phase error DBstate = DB_READY; break; case 2: if ((BSstate == BS_BYTE) && (BSbyteCount > 0)) tprintf(1, "sample %d: Warning: really long pulse\n", time); Bit(time, 2); PLL(time-prevTime); // correct for phase error prevTime = time; DBstate = DB_READY; break; case 1: if ((BSstate == BS_BYTE) && (BSbyteCount > 0)) tprintf(1, "sample %d: long pulse when short pulse expected\n", time); Bit(time, 1); PLL(time-prevTime); // correct for phase error prevTime = time; DBstate = DB_READY; break; default: assert(0); break; } prevTime = time; break; case DB_READY: type = ClassifyTransition(time, time-prevTime); switch (type) { case -1: if ((BSstate == BS_BYTE) && (BSbyteCount > 0)) tprintf(1, "sample %d: Warning: runt pulse\n", time); Bit(time, -1); halfTime = time; DBstate = DB_READY; break; case 0: halfTime = time; DBstate = DB_HALF_BIT; break; case 2: if (BSstate == BS_BYTE) tprintf(1, "sample %d: Warning: long pulse\n", time); Bit(time, 2); PLL(time-prevTime); // correct for phase error prevTime = time; break; case 1: Bit(time, 1); PLL(time-prevTime); // correct for phase error prevTime = time; break; default: assert(0); break; } break; default: assert(0); break; } } // ========================================================================= // find the duration of each flux zone #if 1 // detect zero crossings directly void FindTransitions() { bool bFirst = true; // first transition seen sample_t prevSamp; // previous sample prevSamp = GETIN(0); for(uint32 nSamp=1; nSamp prev_max) { prev_max = samp; prev_max_t = nSamp; bUp = true; } else if (samp < prev_min) { prev_min = samp; prev_min_t = nSamp; bUp = false; } else { // look ahead a few samples to see if we just found the local min/max bool atpeak = true; for(int n=1; n<5; n++) { sample_t s = GETIN(nSamp + n); if ((s > prev_max) || (s < prev_min)) { atpeak = false; break; } } if (atpeak && (prev_max - prev_min > 2000)) { uint32 intercept = (prev_max_t + prev_min_t + 1) >> 1; if (bUp) { printf("sample %d: detected max peak at %d\n", nSamp, prev_max); prev_min = prev_max; } else { printf("sample %d: detected min peak at %d\n", nSamp, prev_min); prev_max = prev_min; } DecodeBits(intercept); } } } StreamDone(); } #endif // ========================================================================= // main void usage(int code) { FILE *f = (code == 0) ? stdout : stderr; fprintf(f, "Usage: dpwav2tap [-v [#]] [-x] [-o .tag] .wav\n"); fprintf(f, "-v is report verbosity level\n"); fprintf(f, " with no -v, reporting is at a minimum;\n"); fprintf(f, " with no # specified, 1 is assumed;\n"); fprintf(f, " -v 2 through -v 4 provide increasing detail.\n"); fprintf(f, "-o specifies a specific output filename;\n"); fprintf(f, " by default it is .tap\n"); fprintf(f, "-x says to \"explode\" each record into a separate file,\n"); fprintf(f, " which are -###.tap\n"); fprintf(f, "Version: August 24, 2005\n"); exit(code); #if 0 // detailed -v information 0 = just produce the output file and errors 1 = errors, warnings, length of each decoded block 2 = and each byte decoded 3 = and each bit decoded 4 = and PLL value #endif } // parse the command line arguments void ParseArgs(int argc, char **argv) { // set command line defaults opt_v = 0; opt_x = false; opt_x_num = 0; opt_ofn = NULL; opt_ofmt = OFMT_TAP; #if 1 if (argc < 2) // we need at least one parameter usage(-1); #endif for(int i=1; i= 4) && (stricmp(&opt_ifn[len-4], ".wav") == 0)) { // input filename ends in ".wav"; replace it with .tap strcpy(&opt_ofn[len-3], "tap"); } else { // input filename doesn't end like we'd expect opt_ofn = strcat(opt_ifn, ".tap"); } } // if -x is in effect, insert "-%03d" just before .tap // in order to generate serialized filenames. if (opt_x) { int len = strlen(opt_ofn); char *tmp = (char*)malloc(len+5); assert(tmp != NULL); strcpy(tmp, opt_ofn); if (stricmp(&opt_ofn[len-4], ".tap") == 0) strcpy(&tmp[len-4], "-%03d.tap"); else if (stricmp(&opt_ofn[len-4], ".bin") == 0) strcpy(&tmp[len-4], "-%03d.bin"); #if SUPPORT_HEX else if (stricmp(&opt_ofn[len-4], ".hex") == 0) strcpy(&tmp[len-4], "-%03d.hex"); #endif else strcpy(&tmp[len], "-%03d"); free(opt_ofn); opt_ofn = tmp; } { int len = strlen(opt_ofn); opt_ofmt = (stricmp(&opt_ofn[len-4], ".bin") == 0) ? OFMT_BIN #if SUPPORT_HEX : (stricmp(&opt_ofn[len-4], ".hex") == 0) ? OFMT_HEX #endif : OFMT_TAP; } #if 0 // report how command line was parsed printf("opt_v = %d\n", opt_v); printf("opt_x = %d\n", (int)opt_x); printf("opt_ofn = '%s'\n", opt_ofn); printf("opt_ifn = '%s'\n", opt_ifn); exit(0); #endif } int main(int argc, char **argv) { // parse command line arguments ParseArgs(argc, argv); fIn = fopen(opt_ifn, "rb"); if (fIn == NULL) { fprintf(stderr, "Error: couldn't open file '%s'\n", opt_ifn); exit(-1); } // make sure the wav file is OK CheckHeader(); // initialize our work buffer // NB: the whole inbuffer complication is plumbing for a more // sophisticated version of this program that looks forward // and back in the sample stream. FillInbuffer(); // process the file FindTransitions(); fclose(fIn); return 0; }