#include <stdint.h>
#include <stdio.h>
#include <string.h>
#include <stdlib.h>

// define nocolors so that output is not ugly on windows
// #define NOCOLORS

// [prev char][following char]
// # occurrences at index
uint64_t result_array[26][26]; // 42 kb

// counts for each char's occurrences
uint64_t prob[26];

// pairs sorted by # occurrences
char sorted_pairs[26 * 26][2];

// [prev char][chars sorted by likelyness 0 high 26 low]
char sorted_result[26][26];

// [cons-cons,cons-vow,vow-cons,vow-vow][idx][char pair]
// = sorted by # occurrences split into which pair it is (vow-cons, cons-vow etc)
char tbl[4][64][2];

void err(char *s) {
  printf("\n\n===============\nerr: %s\n", s);
  exit(-1);
}

void byte2bin(char b) {
  printf("%d%d%d%d%d%d%d%d", (b & 0x80 ? 1 : 0), \
                                (b & 0x40 ? 1 : 0), \
                                (b & 0x20 ? 1 : 0), \
                                (b & 0x10 ? 1 : 0), \
                                (b & 0x08 ? 1 : 0), \
                                (b & 0x04 ? 1 : 0), \
                                (b & 0x02 ? 1 : 0), \
                                (b & 0x01 ? 1 : 0));
}

void any2bin(char *data, size_t len) {
  size_t pos = 0;
  while (pos < len) {
    byte2bin(data[pos]);
    printf(" ");
    ++pos;
  }
}

void pcc(char c) {
#ifndef NOCOLORS
  switch (c) {
    case 'a':
    case 'e':
    case 'i':
    case 'o':
    case 'u':
      printf("\033[1m");
      break;
    default:
      break;
  }
  printf("%c\033[0m", c);
#else
  printf("%c", c);
#endif
}

// only take (us) alphabet ... for now

int is_validchar(char x) {
  if (x > 64 && x < 91)
    return 1;

  if (x > 96 && x < 123)
    return 1;

  return 0;
}

// basically tolower

char get_coord(char x) {
  if (x > 96)
    return x - 97;

  if (x > 64)
    return x - 65;

  err("get_coord: out of range :o");
  return 0;
}

// increment counter for char combination prev-next

void inc_count(char prev, char next) {
  int xcord, ycord;
  xcord = get_coord(prev);
  ycord = get_coord(next);

  if (xcord < 0 || xcord > 25)
    err("inc_count: x out of range");

  if (ycord < 0 || ycord > 25)
    err("inc_count: y out of range");

  result_array[xcord][ycord] += 1;
}

// check charpair, increment

void digest_charpair(char prev, char next) {
  if (!is_validchar(prev))
    return;

  if (!is_validchar(next))
    return;

  inc_count(prev, next);
}

// increment single char occurrences

void digest_char(char curr) {
  int idx;

  if (!is_validchar(curr))
    return;

  idx = get_coord(curr);

  if (idx < 0 || idx > 25)
    err("inc_count: x out of range");

  prob[idx] += 1;
}

// sort:
// print the sorted ones, also fill the sorted_pairs table

void sortres() {
  int xmax = 0, ymax = 0;
  int i, j;
  uint64_t max, k;
  uint64_t r_a[26][26];

  memcpy(r_a, result_array, 26 * 26 * sizeof (uint64_t));

  printf("Sorting... \n");
  for (k = 0; k < 26 * 26; k++) {
    xmax = 0;
    ymax = 0;
    max = 0;

    for (i = 0; i < 26; i++) {
      for (j = 0; j < 26; j++) {
        if (r_a[i][j] > max) {
          max = r_a[i][j];
          xmax = i;
          ymax = j;
        }
      }
    }

    if (max == 0)
      break;

    printf("#%5ld: %c%c (%ld)\n", k + 1, xmax + 97, ymax + 97, max);
    r_a[xmax][ymax] = 0;
    sorted_pairs[k][0] = xmax + 97;
    sorted_pairs[k][1] = ymax + 97;
  }
}

// sorts additionally depending on which combination (vowel-cons, cons-vowel, vow-vow, cons-cons)

void make_table() {
  int idx0 = 0, idx1 = 0, idx2 = 0, idx3 = 0;
  long i, j;

  for (i = 0; i < 26 * 26; i++) {
    switch (sorted_pairs[i][0]) {
      case 'a':
      case 'e':
      case 'i':
      case 'o':
      case 'u':
        if (sorted_pairs[i][1] == 'a' ||
                sorted_pairs[i][1] == 'e' ||
                sorted_pairs[i][1] == 'i' ||
                sorted_pairs[i][1] == 'o' ||
                sorted_pairs[i][1] == 'u') {
          // double vowel
          if (idx3 > 63) break;

          tbl[3][idx3][0] = sorted_pairs[i][0];
          tbl[3][idx3][1] = sorted_pairs[i][1];
          ++idx3;
        } else {
          // start with vowel, end consonant
          if (idx2 > 63) break;

          tbl[2][idx2][0] = sorted_pairs[i][0];
          tbl[2][idx2][1] = sorted_pairs[i][1];
          ++idx2;
        }
        break;
      default:
        if (sorted_pairs[i][1] == 'a' ||
                sorted_pairs[i][1] == 'e' ||
                sorted_pairs[i][1] == 'i' ||
                sorted_pairs[i][1] == 'o' ||
                sorted_pairs[i][1] == 'u') {
          // start with consonant, end with vowel
          if (idx1 > 63) break;

          tbl[1][idx1][0] = sorted_pairs[i][0];
          tbl[1][idx1][1] = sorted_pairs[i][1];
          ++idx1;

        } else {
          // start cons, end cons
          if (idx0 > 63) break;

          tbl[0][idx0][0] = sorted_pairs[i][0];
          tbl[0][idx0][1] = sorted_pairs[i][1];
          ++idx0;
        }
    }
  }

  printf("Got tables:\n");
  for (i = 0; i < 4; i++) {
    printf("[");
    for (j = 0; j < 64; j++) {
      printf("%c%c, ", tbl[i][j][0], tbl[i][j][1]);
    }
    printf("]\n");
  }
}

void make_table2() {
  int i, j, k;
  int ymax = 0;
  char used[26][26] = {
    {0}
  };
  uint64_t max;

  for (i = 0; i < 26; i++) { // iterate alphabet
    for (j = 0; j < 26; j++) { // iterate next-char sorting (jth rank)
      ymax = -1;
      max = 0;

      for (k = 0; k < 26; k++) { // find the highest next unused
        if (result_array[i][k] >= max && !used[i][k]) {

          // if we already have a max candidate AND the score of both is equal
          // prefer letters with higher occurrences
          if (result_array[i][k] == max && ymax >= 0) {
            if (prob[k] < prob[ymax])
              continue;
          }

          max = result_array[i][k];
          ymax = k;
        }
      }

      used[i][ymax] = 1;

      if (max) {
        sorted_result[i][j] = ymax + 97;
      } else {
        sorted_result[i][j] = 0;
      }
    }
  }

  printf("\t");
  for (i = 0; i < 26; i++) {
    printf("%2d  ", i);
  }

  for (i = 0; i < 26; i++) {
    printf("\n%2d %c\t ", i, i + 97);
    for (j = 0; j < 26; j++) {
      pcc(sorted_result[i][j]);
      printf("   ");
    }
  }

}

int nonzentries(char *a) {
  if (a[25])
    return 26;

  return strlen(a);
}

int getgetbits(int x) {
  int cnt = 7;
  int y;

  if (x == 0 || x == 1)
    return 0;

  y = 1 << 7;
  while (!(x & y) && cnt > 0) {
    y = y >> 1;
    --cnt;
  }

  return cnt;
}

// get next nbits from data while being at byte byteidx and bit bitidx

int getbits(char *data, size_t len, uint64_t *byteidx, int *bitidx, int nbits) {

  union {
    uint8_t buf8[2];
    uint16_t buf16;
  } buf;
  buf.buf16 = 0;

  if (nbits > 8)
    err("too many bits.");

  if (*byteidx >= len)
    err("overflow");

  buf.buf8[1] = data[*byteidx];

  if (*byteidx + 1 < len) {
    buf.buf8[0] = data[*byteidx + 1];
  }

  if (*bitidx) {
    buf.buf16 = buf.buf16 << *bitidx;
  }

  buf.buf16 = buf.buf16 >> (16 - nbits);

  //  printf("got %d bits from byte %ld bit %d: ", nbits, *byteidx + 1, *bitidx + 1);
  //  byte2bin(buf.buf8[0]);
  //  printf(" (%d)", buf.buf8[0]);
  //  printf("\n");

  *bitidx = *bitidx + nbits;
  if (*bitidx > 7) {
    *bitidx = *bitidx % 8;
    *byteidx += 1;
  }

  return (int) buf.buf8[0];
}

// encode data using tables

void enc_data(char *data, size_t len) {
  uint64_t byteidx = 0;
  int bitidx = 0,
          state = 1, /* start with consonant-vowel */
          pairidx = 0,
          neededbits = 0,
          sylls = 0;

  // states: 0     1       2         3
  // [cons-cons,cons-voc,voc-cons,voc-voc]

  while (byteidx < len) {
    if (state != 3)
      neededbits = 6;
    else
      neededbits = 4;

    pairidx = getbits(data, len, &byteidx, &bitidx, neededbits);

    printf("%c%c", tbl[state][pairidx][0], tbl[state][pairidx][1]);

    // 3 "syllables" per word
    ++sylls;
    if (!(sylls % 3)) {
      sylls = 0;
      printf(" - ");
    }

    // state update
    /*
     * state	0	1
     * 0 kk      2 vk    3 vv
     * 1 kv      2 vk    0 kk
     * 2 vk      3 vv    1 kv
     * 3 vv      0 kk    1 kv
     * 
     *
     */

    if (byteidx >= len)
      break;

    pairidx = getbits(data, len, &byteidx, &bitidx, 1);
    if (state == 3) {
      if (pairidx)
        state = 1;
      else
        state = 0;
    } else if (state == 2) {
      if (pairidx)
        state = 1;
      else
        state = 3;
    } else if (state == 1) {
      if (pairidx)
        state = 0;
      else
        state = 2;
    } else { // state == 0
      if (pairidx)
        state = 3;
      else
        state = 2;
    }
  }

  if (!sylls)
    printf("%d", bitidx);
  else
    printf(" - %d", bitidx);
}

void enc_data2(char *data, size_t len) {
  /*
   * for first char, get 4 bits
   * 
   * get 4 bits:
   * 0-14: return char at pos 0..14
   * 15: get 3 more bits
   *    0-6: return char at pos 15+(0..6)
   *    7: get 2 more bits
   *      0-2: return char at pos 22+(0..2)
   *      3: get 1 more bit
   *        return char at pos 25+(0..1)
   */

  uint64_t byteidx = 0;
  int bitidx = 0,
          curcharidx,
          nextcharidx,
          wordlen = 0,
          nchoices,
          nbits,
          tmp;

  if (!len)
    err("0 length data");

  // get a first char
  curcharidx = getbits(data, len, &byteidx, &bitidx, 4);
  printf("%c", curcharidx + 97);

  while (byteidx < len) {
    nchoices = nonzentries(sorted_result[curcharidx]);
    nextcharidx = 0;

    if (!nchoices)
      err("some char doesn't have any following chars with probability > 0... maybe pipe more text.");

    do {
      nbits = getgetbits(nchoices);
      nchoices -= 1 << nbits;
      if (nchoices)
        nchoices += 1;

      tmp = getbits(data, len, &byteidx, &bitidx, nbits);
      nextcharidx += tmp;
      if (tmp != (1 << nbits) - 1)
        break;

      if (!nchoices)
        break;
    } while (1);



    //    nextcharidx = getbits(data, len, &byteidx, &bitidx, 4);
    //    if (nextcharidx == 15) {
    //      nextcharidx = getbits(data, len, &byteidx, &bitidx, 3);
    //      if (nextcharidx == 7) {
    //        nextcharidx = getbits(data, len, &byteidx, &bitidx, 2);
    //        if (nextcharidx == 3) {
    //          nextcharidx = getbits(data, len, &byteidx, &bitidx, 1);
    //          nextcharidx += 25;
    //        } else {
    //          nextcharidx += 22;
    //        }
    //      } else {
    //        nextcharidx += 15;
    //      }
    //    }



    printf("%c", sorted_result[curcharidx][nextcharidx]);
    curcharidx = sorted_result[curcharidx][nextcharidx] - 97;
    ++wordlen;
    if (wordlen > 5) {
      wordlen = 0;
      printf(" - ");
    }
  }

  if (wordlen)
    printf(" - %d", bitidx);
  else
    printf("%d", bitidx);
}

int main() {
  char prev = 0, curr = 0;
  size_t pos = 0;
  int i, j, c_i;

  //  char data[] = {0, 0, 0};
  //  int datalen = 3;

  char data[] = {77, 123, 89, 150, 111, 22, 208, 119, 251, 66, 25, 173, 241, 226, 133, 64, 65, 37, 210, 167, 189, 118, 77, 140, 45, 165, 4, 51, 21, 120, 14, 97};
  int datalen = 32;

  do {
    prev = curr;
    c_i = fgetc(stdin);
    if (c_i == EOF)
      break;
    curr = c_i;

    digest_char(curr);
    digest_charpair(prev, curr);
    ++pos;
  } while (1);

  printf("Processed %ld chars.\n", pos);

  printf("Result (rows: first char, cols: 2nd char): \n");

  // header
  printf("\t");
  for (i = 0; i < 26; i++) {
    printf("%c\t", i + 97);
  }
  printf("\n");

  for (i = 0; i < 26; i++) {
    printf("%c\t", i + 97);
    for (j = 0; j < 26; j++) {
      printf("%ld\t", result_array[i][j]);
    }
    printf("\n");
  }

  printf("\n");

  // header
  printf("\t");
  for (i = 0; i < 26; i++) {
    printf("%c\t", i + 97);
  }
  printf("\n\t");
  for (i = 0; i < 26; i++) {
    printf("%ld\t", prob[i]);
  }

  printf("\n");

  //  sortres();
  //  printf("\n");
  //
  //  make_table();
  //  printf("\n");
  //
  //  enc_data(data, 32);
  //  printf("\n");

  printf("\n");
  make_table2();
  printf("\n");

  any2bin(data, datalen);
  printf("\n");

  enc_data2(data, datalen);
  printf("\n");

  exit(0);
}



