#include <stdint.h>
#include <stdio.h>
#include <string.h>
#include <stdlib.h>

// [prev char][following char]
// # occurrences at index
uint64_t result_array[26][26];	// 42 kb

// counts for each char's occurrences
uint64_t prob[26];

// pairs sorted by # occurrences
char     sorted_pairs[26*26][2];

// [cons-cons,cons-vow,vow-cons,vow-vow][idx][char pair]
// = sorted by # occurrences split into which pair it is (vow-cons, cons-vow etc)
char     tbl[4][64][2];

void err(char *s) {
	printf("\n\n===============\nerr: %s\n", s);
	exit(-1);
}

// only take (us) alphabet ... for now
int is_validchar(char x) {
	if (x > 64 && x < 91)
		return 1;

	if (x > 96 && x < 123)
		return 1;

	return 0;
}

// basically tolower
char get_coord(char x) {
	if (x > 96)
		return x-97;

	if (x > 64)
		return x-65;

	err("get_coord: out of range :o");
	return 0;
}

// increment counter for char combination prev-next
void inc_count(char prev, char next) {
	int xcord,ycord;
	xcord = get_coord(prev);
	ycord = get_coord(next);

	if (xcord < 0 || xcord > 25)
		err("inc_count: x out of range");

	if (ycord < 0 || ycord > 25)
		err("inc_count: y out of range");

	result_array[xcord][ycord] += 1;
}

// check charpair, increment
void digest_charpair(char prev, char next) {
	if (!is_validchar(prev))
		return;

	if (!is_validchar(next))
		return;

	inc_count(prev, next);
}

// increment single char occurrences
void digest_char(char curr) {
	int idx;

	if (!is_validchar(curr))
		return;

	idx = get_coord(curr);

	if (idx < 0 || idx > 25)
		err("inc_count: x out of range");

	prob[idx] += 1;
}

// sort:
// print the sorted ones, also fill the sorted_pairs table
void sortres() {
	int xmax=0, ymax=0;
	int i, j;
	uint64_t max, k;
	uint64_t r_a[26][26];

	memcpy(r_a, result_array, 26*26*sizeof(uint64_t));

	printf("Sorting... \n");
	for (k = 0; k < 26*26; k++) {
		xmax = 0;
		ymax = 0;
		max = 0;

		for (i = 0; i < 26; i++) {
			for (j = 0; j < 26; j++) {
				if (r_a[i][j] > max) {
					max = r_a[i][j];
					xmax = i;
					ymax = j;
				}
			}
		}

		if (max == 0)
			break;

		printf("#%5ld: %c%c (%ld)\n", k+1, xmax+97, ymax+97, max);
		r_a[xmax][ymax] = 0;
		sorted_pairs[k][0]=xmax+97;
		sorted_pairs[k][1]=ymax+97;
	}
}

// sorts additionally depending on which combination (vowel-cons, cons-vowel, vow-vow, cons-cons)
void make_table() {
	int idx0=0, idx1=0, idx2=0, idx3=0;
	long i,j;

	for (i = 0; i < 26*26; i++) {
		switch(sorted_pairs[i][0]) {
			case 'a':
			case 'e':
			case 'i':
			case 'o':
			case 'u':
				if(sorted_pairs[i][1]=='a' ||
				sorted_pairs[i][1]=='e' ||
				sorted_pairs[i][1]=='i' ||
				sorted_pairs[i][1]=='o' ||
				sorted_pairs[i][1]=='u') {
					// double vowel
					if (idx3 > 63) break;

					tbl[3][idx3][0]=sorted_pairs[i][0];
					tbl[3][idx3][1]=sorted_pairs[i][1];
					++idx3;
				} else {
					// start with vowel, end consonant
					if (idx2 > 63) break;

					tbl[2][idx2][0]=sorted_pairs[i][0];
					tbl[2][idx2][1]=sorted_pairs[i][1];
					++idx2;
				}
				break;
			default:
				if(sorted_pairs[i][1]=='a' ||
				sorted_pairs[i][1]=='e' ||
				sorted_pairs[i][1]=='i' ||
				sorted_pairs[i][1]=='o' ||
				sorted_pairs[i][1]=='u') {
					// start with consonant, end with vowel
					if (idx1 > 63) break;

					tbl[1][idx1][0]=sorted_pairs[i][0];
					tbl[1][idx1][1]=sorted_pairs[i][1];
					++idx1;

				} else {
					// start cons, end cons
					if (idx0 > 63) break;

					tbl[0][idx0][0]=sorted_pairs[i][0];
					tbl[0][idx0][1]=sorted_pairs[i][1];
					++idx0;
				}
		}
	}

	printf("Got tables:\n");
	for(i = 0; i < 4; i++) {
		printf("[");
		for (j = 0; j < 64; j++) {
			printf("%c%c, ", tbl[i][j][0], tbl[i][j][1]);
		}
		printf("]\n");
	}
}

// get next nbits from data while being at byte byteidx and bit bitidx
int getbits(char *data, size_t len, uint64_t *byteidx, int *bitidx, int nbits) {
	union {
		uint8_t buf8[2];
		uint16_t buf16;
	} buf;
	buf.buf16 = 0;

	if (nbits > 8)
		err("too many bits.");

	if (*byteidx >= len)
		err("overflow");

	buf.buf8[1] = data[*byteidx];

	if (*byteidx + 1 < len) {
		buf.buf8[0] = data[*byteidx + 1];
	}

	if (*bitidx) {
		buf.buf16 = buf.buf16 << *bitidx;
	}

	buf.buf16 = buf.buf16 >> (16 - nbits);

	*bitidx = *bitidx + nbits;
	if (*bitidx > 7) {
		*bitidx = *bitidx % 8;
		*byteidx += 1;
	}

	return (int) buf.buf8[0];
}

// encode data using tables
void enc_data(char *data, size_t len) {
	uint64_t byteidx = 0;
	int bitidx = 0,
	    state = 1, /* start with consonant-vowel */
	    pairidx = 0,
	    neededbits = 0,
	    sylls = 0;

	// states: 0     1       2         3
	// [cons-cons,cons-voc,voc-cons,voc-voc]
	
	while (byteidx < len) {
		if (state != 3)
			neededbits = 6;
		else
			neededbits = 4;

		pairidx = getbits(data, len, &byteidx, &bitidx, neededbits);

		printf("%c%c", tbl[state][pairidx][0], tbl[state][pairidx][1]);

		// 3 "syllables" per word
		++sylls;
		if (!(sylls % 3)) {
			sylls = 0;
			printf(" - ");
		}

		// state update
		/*
		 * state	0	1
		 * 0 kk      2 vk    3 vv
		 * 1 kv      2 vk    0 kk
		 * 2 vk      3 vv    1 kv
		 * 3 vv      0 kk    1 kv
		 * 
		 *
		 */

		if (byteidx >= len)
			break;

		pairidx = getbits(data, len, &byteidx, &bitidx, 1);
		if (state == 3) {
			if (pairidx)
				state = 1;
			else
				state = 0;
		} else if (state == 2) {
			if (pairidx)
				state = 1;
			else
				state = 3;
		} else if (state == 1) {
			if (pairidx)
				state = 0;
			else
				state = 2;
		} else { // state == 0
			if (pairidx)
				state = 3;
			else
				state = 2;
		}
	}

	if (!sylls)
		printf("%d", bitidx);
	else
		printf(" - %d", bitidx);
}


int main() {
	char prev = 0, curr = 0;
	size_t pos = 0;
	int i,j,c_i;

//	char data[] = {223, 34, 38, 237, 81, 224, 186};
	char data[] = {76,123,89,150,111,22,208,119,251,66,25,173,241,226,133,64,65,37,210,167,189,118,77,140,45,165,4,51,21,120,14,97};

	do {
		prev = curr;
		c_i = fgetc(stdin);
		if (c_i == EOF)
			break;
		curr = c_i;

		digest_char(curr);
		digest_charpair(prev, curr);
		++pos;
	} while (1);

	printf("Processed %ld chars.\n", pos);

	printf("Result (rows: first char, cols: 2nd char): \n");

	// header
	printf("\t");
	for (i = 0; i < 26; i++) {
		printf("%c\t", i+97);
	}
	printf("\n");

	for (i = 0; i < 26; i++) {
		printf("%c\t", i+97);
		for (j = 0; j < 26; j++) {
			printf("%ld\t", result_array[i][j]);
		}
		printf("\n");
	}

	printf("\n");

	// header
	printf("\t");
	for (i = 0; i < 26; i++) {
		printf("%c\t", i+97);
	}
	printf("\n\t");
	for (i = 0; i < 26; i++) {
		printf("%ld\t", prob[i]);
	}

	printf("\n");

	sortres();
	printf("\n");

	make_table();
	printf("\n");

	enc_data(data, 32);
	printf("\n");

	exit(0);
}



