4.8 Converting Text Keys to Binary Keys

4.8.1 Problem

A user enters a textual representation of a key or other binary data (see Recipe 4.7). You need to convert it to binary.

4.8.2 Solution

Parse out the words, then look them up in the dictionary to reconstruct the actual bits, as shown in the code included in the next section.

4.8.3 Discussion

This function spc_words2bin() uses the wordlist.h file provided on the book's web site, and it can be changed as described in Recipe 4.7.

#include <stdlib.h>
#include <string.h>
#include <ctype.h>
#include "wordlist.h"
   
#define BITS_IN_LIST 11
#define MAX_WORDLEN  4
   
unsigned char *spc_words2bin(unsigned char *str, size_t *outlen) {
  int           cmp, i;
  size_t        bitsinword, curbits, needed, reslen;
  unsigned int  ix, min, max;
  unsigned char *p = str, *r, *res, word[MAX_WORDLEN + 1];
   
  curbits = reslen = *outlen = 0;
  if(!(r = res = (unsigned char *)malloc((strlen(str) + 1) / 2))  
    return 0;
  memset(res, 0, (strlen(str) + 1) / 2);
   
  for (;;) {
    while (isspace(*p)) p++;
    if (!*p) break;
    /* The +1 is because we expect to see a space or a NULL after each and every
     * word; otherwise, there's a syntax error.
     */
    for (i = 0;  i < MAX_WORDLEN + 1;  i++) {
      if (!*p || isspace(*p)) break;
      if (islower(*p)) word[i] = *p++ - ' ';
      else if (isupper(*p)) word[i] = *p++;
      else {
        free(res);
        return 0;
      }
    }
    if (i =  = MAX_WORDLEN + 1) {
      free(res);
      return 0;
    }
    word[i] = 0;
   
    min = 0;
    max = (1 << BITS_IN_LIST) - 1;
    do {
      if (max < min) {
        free(res);
        return 0; /* Word not in list! */
      }
      ix = (max + min) / 2;
      cmp = strcmp(word, words[ix]);
      if (cmp > 0) min = ix + 1;
      else if (cmp < 0) max = ix - 1;
    } while (cmp);
   
    bitsinword = BITS_IN_LIST;
    while (bitsinword) {
      needed = 8 - curbits;
      if (bitsinword <= needed) {
        *r |= (ix << (needed - bitsinword));
        curbits += bitsinword;
        bitsinword = 0;
      } else {
        *r |= (ix >> (bitsinword - needed));
        bitsinword -= needed;
        ix &= ((1 << bitsinword) - 1);
        curbits = 8;
      }
      if (curbits =  = 8) {
        curbits = 0;
        *++r = 0;
        reslen++;
      }
    }
  }
   
  if (curbits && *r) {
     free(res);
     return 0; /* Error, bad format, extra bits! */
  }
  *outlen = reslen;
  return (unsigned char *)realloc(res, reslen);
}

The inputs to the spc_words2bin( ) function are str, which is the English representation of the binary string, and outlen, which is a pointer to how many bytes are in the output. The return value is a binary string of length len. Note that any bits encoded by the English words that don't compose a full byte must be zero, but are otherwise ignored.

You must know a priori how many bytes you expect to get out of this function. For example, 6 words might map to a 56-bit binary string or to a 64-bit binary string (5 words can encode at most 55 bits, and 6 words encodes up to 66 bits).

4.8.4 See Also

Recipe 4.7