/*Program writen by Shannon Thrower
* this version is v1.17 date 01-11-05
* program translates a braille file to print according to
* the translation algorithm developed by Paul Blenkhorn

The rules are read from a text file at a location defiened by RULES_PATH
The input is defined by IN_PATH and the output by OUT_PATH

note that the size of the input line defined by INPUT_SIZE. if this is insufficent
it may cause the program to crash. it would be desirable to checking to make sure
the input line is not bigger than this.

the use of the strlen command returns different values for files edited in window
and linux. this may cause problems when setting a rule's next state field
*/

#include <stdio.h>
#include <stdlib.h>
#include <string.h>
#include <ctype.h>

/*****************************************************************************************/
/***********************	Constant Macros		******************************************/
#define RULESIZE 40					// maximum length of a rule
#define FOCUSSIZE 10				// maximum length of a rule's focus
#define CONTEXTSIZE 4				// maximum length of a rule's right context
#define OUTSIZE 25					// maximum length of a rule's output
#define NO_RIGHT_CONTEXT "]="		// used to determine when there is no right context
#define OUTPUT_SIZE 200				// max length of a translated word
#define INPUT_SIZE 200				// max length of a braille input word to be translated

// path definitions for linux (operating over Network File System)
#define RULES_PATH "/opt/B2P rules.txt"
#define IN_PATH "/opt/text.brf"
#define OUT_PATH "/opt/output.txt"

// path definitions for windows
//#define RULES_PATH "C:/B2P rules.txt"
//#define IN_PATH "C:/braille input.brf"
//#define OUT_PATH "C:/print output.txt"


typedef struct myRule {
		int input_class;					// structure holds all the information
		char focus[FOCUSSIZE];				// about a particular rule. This struct
		char right_context[CONTEXTSIZE];	// is used in a linked list hence the
		char output[OUTSIZE];				// last component is a link to the next
		int next_state;						// rule in the list.
		struct myRule *next;
	}rule_t;


/*****************************************************************************************/
/*******************	Function Prototypes		******************************************/
int HashFunction(char key);
void ConstructRules(char *location);
void TranslateLine(char braille_line[], char print_line[]);
void PrintRule(rule_t *rule);



/*****************************************************************************************/
/*******************	Global Variables		******************************************/
//stores the head of each linked list where there is one list for each letter and another for all other characters
rule_t *list_array[27];



/*****************************************************************************************/
/*******************	main()		******************************************************/

int main(void)
{
	char output_string[OUTPUT_SIZE];
	char input_string[INPUT_SIZE];

	// read rules from file and generate linked list of data structures
	ConstructRules(RULES_PATH);


	FILE *infile, *outfile;
	infile = fopen(IN_PATH, "r");
	outfile = fopen(OUT_PATH, "w");
	if (infile == NULL){
		printf("**************************************\n* Cannot open the input file.        *\n* The input file should be stored at *\n* %s\n**************************************\n",IN_PATH);
		return(0);
	}else{
		while(fgets(input_string, 1000, infile)!=NULL){ //get a line of input from the input file
			TranslateLine(input_string, output_string); //translate that line
			printf("%s", output_string);
			fprintf(outfile, "%s", output_string);
		}
	}
	fclose(infile);
	fclose(outfile);

	return(0);
}
/*****************************************************************************************/




/*****************************************************************************************/
/*******************	TranslateLine()		**********************************************
this is the function responsible for translating a full line of braille into a
corresponding line of print
*/
void TranslateLine(char braille_line[], char print_line[]){
	//char print_line[OUTPUT_SIZE];
	int current_state = 1, array_index, match_flag=0, focus_length;
	int focus_match, state_match; // used as flags to represent a match occuring
   	int char_ctxt_match; // used as a flag when matching a single character of a rules right context
	int shift_char=0, shift_word=0; //used as flags to capitalise a char or a word
	int n; //used to process each char of a rule's right context
	char right_of_focus[INPUT_SIZE]; //stores the braille input to the right of the current focus
	char current_character = braille_line[0]; //the first char of the braille input
	char focus[FOCUSSIZE], braille_line_copy[INPUT_SIZE];
	char new_out[OUTSIZE]; //temporarily holds a rules output while capitalizing may be done
	rule_t *current_rule;

	int decision_table [6][7]={{1,1,1,0,0,0,0},
							   {1,0,1,0,0,0,0},	//row represents the current state
							   {1,0,0,1,0,0,0},	//column represents the current input class
							   {1,0,0,0,0,0,1},	//rule fires if there is a 1 corresponding to
							   {1,0,0,0,1,0,0},	//current state and input class
							   {0,0,0,0,0,1,0}};

	//initialise variables
	strcpy(braille_line_copy, braille_line);
	strcpy(right_of_focus, "\0");
	strcpy(print_line, "\0");


	while(braille_line[0] !='\0' && braille_line[0] !='\n'){
		match_flag=0;

		/*ASCII char 12 is the form feed or new page char.
		for now just remove it and ignore it*/
		if((int)braille_line[0]==12){
			strcpy(braille_line, &braille_line[1]);
		}
		//determine which linked list needs to be searched
		current_character = braille_line[0];
		array_index = (int)current_character-(int)'A';
		if(array_index<0 || array_index>25){
			array_index = 26;
		}
		//search linked list corresponding to array_index for a match
		current_rule = list_array[array_index];

		while(!match_flag && current_rule != NULL){
			strcpy(focus, current_rule->focus); //ie 	focus = current_rule->focus
			focus_length = strlen(focus);

			/*strnicmp returns 0 if the rule's focus matches the
			braille word (up to the lengh of the focus)*/
			focus_match = strncasecmp(focus, braille_line, focus_length);	/*linux command*/
			//focus_match = strnicmp(focus, braille_line, focus_length);		/*windows command*/

			if (focus_match == 0){
				// the focus has been matched. Now check the state
				// use the rule's state and current input class to check the decision table
				state_match = decision_table[current_state-1][current_rule->input_class-1];
				if (state_match){
					// the state has been matched. now check the right context
					if(current_rule->right_context[0] == '\0'){
						match_flag=1;
					}else{ /* a right context exists for the rule, check the braille input
							* right of the focus to see if a match occurs.*/
						strcpy(right_of_focus, "\0"); //initialise right_of_focus to NULL
						// ie. right_of_focus = braille_line from the end of the current focus
						strcpy(right_of_focus, &braille_line[strlen(current_rule->focus)]);
						/* match each right context character of the rule. if any one of the chars
						   in the right context does not match then rule can not fire */
						char_ctxt_match = 1;

						for (n=0; n<strlen(current_rule->right_context); n++){
							/* wild cards "!"= one or more of the set "&!(A)."
										  " "= anywhite space char (space \t \n...)
										  "~"= one or more roman letters (I, V, M, C)
										  ":"= zero or more punctuation chars
										  "-"= a space char								*/
							int i=0; // used for counting and removing characters
							switch (current_rule->right_context[n]){
								case '!': // one or more of the set "&!(A)."
									if (right_of_focus[0]=='&' || right_of_focus[0]=='!' || right_of_focus[0]=='(' || right_of_focus[0]=='A' || right_of_focus[0]==')'){
										while (right_of_focus[i]=='&' || right_of_focus[i]=='!' || right_of_focus[i]=='(' || right_of_focus[i]=='A' || right_of_focus[i]==')'){
											++i;
										}
										strcpy(right_of_focus, &right_of_focus[i]);
									} else {
										char_ctxt_match = 0;
									}
									break;
								case ' ': // anywhite space char (space \t \n...)
									/* if the frist character to the right of the focus is not a white space character
									   then set the chat_ctxt_match to false */
									if(isspace((int)right_of_focus[0]) || right_of_focus[0]=='\0'){
										// if there is white space or nothing to the right of the focus
										strcpy(right_of_focus, &right_of_focus[1]);
										// then remove the white space char from input to the right of the focus
									}else{
										char_ctxt_match = 0;
									}
									break;
								case '~': //one or more roman letters (I, V, M, C)
									if (right_of_focus[0]!='I' && right_of_focus[0]!='V' && right_of_focus[0]!='M' && right_of_focus[0]!='C'){
										char_ctxt_match = 0;
									}else{//remove the roman letter for the input to the right of the focus
										while (right_of_focus[i]=='I' || right_of_focus[i]=='V' || right_of_focus[i]=='M' || right_of_focus[i]=='C'){
											++i;
										}
										strcpy(right_of_focus, &right_of_focus[i]);
									}
									break;
								case ':': //zero or more punctuation chars
									/* do not change the value of char_ctxt_match since there will always be
									   zero or more. the vale of char_ctxt_match must be preserved.*/

									//if there are punctuation characters remove them from the right of focus
									while ( ((ispunct((int)right_of_focus[i])|| isdigit((int)right_of_focus[i])))
											 && (right_of_focus[i] != '5')
											 && (right_of_focus[i] != ']')
											 && (right_of_focus[i] != '[')){
										++i;
									}
									strcpy(right_of_focus, &right_of_focus[i]);
									break;
								case '_': //a space char
									//set char_ctxt_match to false if the char immediately right of the focus is not a space
									if(right_of_focus[0] != ' '){
										char_ctxt_match = 0;
									}else{//remove the space char from input to the right of the focus
										strcpy(right_of_focus, &right_of_focus[1]);
									}
									break;
								default: // right context may be a particular letter
									if ( current_rule->right_context[n]>='A' && current_rule->right_context[n]<='Z'){
										// char is an alpha and hence requires a literal match
										if ( right_of_focus[0] == current_rule->right_context[n]){
											//right context matches rule for a particular alpha char
											strcpy(right_of_focus, &right_of_focus[1]);
										} else {
											char_ctxt_match = 0;
										}
									} else {
										char_ctxt_match = 0;
									}
							}
						}// end for statement...for each char of rule's right context

						if (char_ctxt_match == 1){
							// the right context has been matched so set the match flag
							match_flag = 1;
						}
					}
				}
			}
			if(!match_flag){
				//if a match has not been found move to the next rule in the linked list
				current_rule = current_rule->next;
			}
		}
		//a rule has been matched OR the end of the linked list has been reached

 		if (match_flag){
			// a match has been found the rule should fire.
			// update the output, the new state and remove the focus from the input braille word

			//update the state
			current_state = current_rule->next_state;

			//copy the rules output to another string so that it can be capitalised etc
			strcpy(new_out, current_rule->output);

			// if the output is not an alpha letter then we are at the end of a
			// word and must set the shift_word flag to false.
			if(!isalpha(current_rule->output[0])){
				shift_word=0;
			}


			//capitalise the first letter or the whole word depending on the flags
			if(shift_char && isalpha(current_rule->output[0])){
				new_out[0]=new_out[0]-32; //subtract 32 from a letter to capitalise it
				shift_char=0; //set flag to false
			} else if (shift_word){
				//capitalise all letters in the output
				int j=0;
				for (j=0; j<strlen(new_out); j++){ //for all letters in the new output
					if(isalpha(new_out[j])){
						new_out[j]=new_out[j]-32; //subtract 32 from a letter to capitalise it
					}
				}
			}


			//set capitalisation flags or append new output to translated line
			if(!strcmp(current_rule->output, "<SHIFT_CHAR>")){
				shift_char=1;
			}else if(!strcmp(current_rule->output, "<SHIFT_WORD>")){
				shift_word=1;
			}else{
				//concatenate the new output to the translated line
				strcpy(print_line, strcat(print_line, new_out));
			}


			//remove the focus from the input braille word
			int i=0;
			for (i=0; i<=strlen(braille_line); i++){
				braille_line[i]=braille_line[focus_length+i];
			}
		}else{
/**/		//commented text may be used for debugging new rules
			//printf("!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!\n");
			//printf("!!!The translation has exited without a match being found!!!\n");
			//printf("!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!\n");
			//printf("the input was:%s.\n", braille_line);
			//printf("the state was:%d.\n", current_state);
			//strcpy(print_line, strcat(print_line, "\n\n\n\n\n!!!!!!!!!!!!!!!\n!!!!!!!!!!!!!!!\n\n\n\n\n"));

			/*append ERROR to the output. this may be changed to append the character which
			is causing the error to the output. ie strcat(print_line, braille_line[0])*/
			strcpy(print_line, strcat(print_line, "\n\n\nERROR\n\n\n"));
			//remove the first character from the input
			int i=0;
			for (i=0; i<=strlen(braille_line); i++){
				braille_line[i]=braille_line[i+1];
			}
		}
	}

	//append a new line char to the end of the line.
	strcpy(print_line, strcat(print_line, "\n"));

}

/*****************************************************************************************/





/*****************************************************************************************/
/*******************	ConstructRules()	**********************************************
function reads the file as defined by constant macro RULES_FILE and develops an array of linked lists.
Each array element contains the head of a linked list where one linked lists exists for each letter
of the alphabet (plus another element for non-alpha characters). In this way, when searching for a
rule, only small portion of all rules must be traversed corresponding to the particular letter of interest
*/
void ConstructRules(char *location){
	/*variable declarations*/
	char rule[RULESIZE], tempRule[RULESIZE];
	int array_index;
	rule_t *current_rule, *next_rule;
	char  *pch;
	/*open the rules input file as defined by RULES_PATH so that a linked list of rules can be constructed*/
	FILE *rulesfile;
	rulesfile = fopen(location,"r");
	if (rulesfile == NULL)
		{
			printf("**************************************\n* Cannot open the rules file         *\n* The rules file should be stored at *\n* %s\n**************************************\n",RULES_PATH);
		}
	else
		{
		/*for each rule go through the input file and split the rule into its various components to be stored
		in the rule_t data structure*/
		while(fgets(rule, 100, rulesfile)!=NULL){
			//reserve memeory for the new rule
			current_rule = malloc(sizeof(rule_t));
			//initialise the next rule pointer
			current_rule->next = NULL;
			/*set the input class of the rule*/
			current_rule->input_class = (int)rule[0]-48;//must subtract 48 for the integer typecast

			/*set the focus parameter of the rule*/
			strcpy(tempRule, rule);
			if(strstr(tempRule, "[]]")){
				strcpy(current_rule->focus, "]");
			} else {
				pch = strtok (tempRule,"[");
				pch = strtok (NULL, "]");
				strcpy(current_rule->focus, pch);
			}

			/*set the output field of the rule*/
			strcpy(tempRule, rule);
			if (!strstr(tempRule, "=\t")){
				/* an output exists (some may not have an output they
				are used to change the state of the program*/

				/*this section is all very dodgy. basically it finds if there are
				multiple occurences of the equal sign in a rule and if there is
				it replaces the first occurence with an F. then tokenising can
				be applied as normal.*/
				int count=0, index=0;
				int temp=0;
				for (temp=0; temp<strlen(tempRule); temp++){
					if (tempRule[temp]=='='){
						++count;
						if (count==1){
							index=temp;
						}
					}
				}
				if(count==2){
					tempRule[index]='F';
				}
				//now we can tokenise as normal
				pch = strtok (tempRule,"=");
				pch = strtok (NULL, "\t");

				strcpy(current_rule->output, pch);
			}else{
				// an output does not exist. some rules such as the letter sign do not have an output.
				// These are used to change the state of the program.
				strcpy(current_rule->output, "\0");  // note "\0" is the null character
			}


			/*set the next state field of the rule*/
			current_rule->next_state = (int)rule[strlen(rule)-2]-48;//must subtract 48 for the integer typecast
			/*note: the value subtracted from strlen() may be platform dependent.
		   	  when using a file edited in windows the newline char is not included in this strlen count
			  but the newline char is included in a linux strlen count.
			  for windows this value should be 2 but for linux this file should be 3.*/

			/*if there is no right context the strtok command has some problems hence
			 * we need to check if "]=" occurs in the rule. ie if there is no right context.*/
			strcpy(tempRule, rule);
			if (!strstr(tempRule, NO_RIGHT_CONTEXT)){
				// a right context exists
				pch = strtok (tempRule,"]");
				pch = strtok (NULL, "=");
				strcpy(current_rule->right_context, pch);
			}else{
				// a right context does not exist
				strcpy(current_rule->right_context, "\0");  // note "\0" is the null character
			}


			/*determine the list index to according to the first char of the focus*/
			array_index = HashFunction(current_rule->focus[0]);
			if (list_array[array_index] == NULL){
				//Nothing entered in array yet for this focus
				//Add current record into array
				list_array[array_index] = current_rule;
			}else{ //Array entry contains records, go to the end to add this one
				next_rule = list_array[array_index];
                while (next_rule->next != NULL){
	            	next_rule = next_rule->next;
		        }
				//When we get here nextRecord contains the last entry
				//Now just point it to the new entry
				next_rule->next = current_rule;
			}
		}
		printf("\n");
	}
	fclose(rulesfile);
}
/*****************************************************************************************/


/*****************************************************************************************/
/*******************	HashFunction()	**************************************************
This function determines the index of the hash table list_array[] to which a rule should be applied.
It compares the first letter of the rule's focus to the letter A and typecasts the result to an
integer thus defining the index.
*/
int HashFunction(char key){
	int index = (int)key-(int)'A';
	if(index<0 || index>25){
		index = 26;
	}
	return index;
}
/*****************************************************************************************/


/*****************************************************************************************/
/*******************	PrintRule()	******************************************************
not used in program but may be helpful during debugging*/
void PrintRule(rule_t *rule){
	printf("%d\t[%s]%s=%s\t%d\n", rule->input_class, rule->focus, rule->right_context, rule->output, rule->next_state);
}





