/*Program written by Shannon Thrower
* This version v8 date 01-11-05
* program translates a print file to braille according to
* the translation algorithm developed by Paul Blenkhorn

The rules are read from a text file at a location defiened by RULES_PATH
The input is defined by IN_PATH and the output by OUT_PATH

Capital WORD signs ",," will be inserted if the first two letters of a word are upper case irrelevant
of any other letters in the word.

left and right context checking has be nseperated out into functions.
*/

#include <stdio.h>
#include <stdlib.h>
#include <string.h>
#include <ctype.h>

/********************************************************************************************/
/***********************	Constant Macros		*********************************************/
#define RULESIZE 50					// maximum length of a rule
#define FOCUSSIZE 20				// maximum length of a rule's focus
#define CONTEXTSIZE 10				// maximum length of a rule's right context
#define OUTSIZE 25					// maximum length of a rule's output
#define NO_RIGHT_CONTEXT "]="		// used to determine when there is no right context
#define OUTPUT_SIZE 200				// max length of a translated word
#define INPUT_SIZE 200				// max length of a braille input word to be translated

// path definitions for linux (operating over Network File System)
#define RULES_PATH "/opt/P2B rules.txt"
#define IN_PATH "/opt/print input.txt"
#define OUT_PATH "/opt/braille output.brf"

// path definitions for windows
#define RULES_PATH "c:/P2B rules.txt"	// note to just have path as "rules.txt" the file must be stored in folder "lcc"
#define IN_PATH "c:/wind.txt"
#define OUT_PATH "c:/braille output.brf"

typedef struct myRule {
		int input_class;
		char left_context[CONTEXTSIZE];		// structure holds all the information
		char focus[FOCUSSIZE];				// about a particular rule. This struct
		char right_context[CONTEXTSIZE];	// is used in a linked list hence the
		char output[OUTSIZE];				// last component is a link to the next
		int next_state;						// rule in the list.
		struct myRule *next;
	}rule_t;


/********************************************************************************************/
/*******************	Function Prototypes		*********************************************/
int HashFunction(char key);
void ConstructRules(char *location);
void TranslateLine(char print_line[], char braille_line[]);
void PrintRule(rule_t *rule);
int CheckLeftCtxt (char left_ctxt[], char left_of_focus[]);
int CheckRightCtxt (char right_ctxt[], char right_of_focus[]);


/********************************************************************************************/
/*******************	Global Variables		*********************************************/
rule_t *list_array[27]=NULL;//stores the head of each linked list where there is one list for each letter and another for all other characters


/********************************************************************************************/
/*******************	main()		*********************************************************/


int main(void)
{
	rule_t *current_rule;
	char output_string[OUTPUT_SIZE]="\0";
	char input_string[INPUT_SIZE];

	ConstructRules(RULES_PATH);

	FILE *infile, *outfile;
	infile = fopen(IN_PATH, "r");
	outfile = fopen(OUT_PATH, "w");
	if (infile == NULL){
		printf("**************************************\n* Cannot open the input file.        *\n* The input file should be stored at *\n* %s\n**************************************\n",IN_PATH);
		return(0);
	}else{
		while(fgets(input_string, 1000, infile)!=NULL){
			//perform translation for each line of the input file
			TranslateLine(input_string, output_string);
			printf("The output is:%s\n", output_string);
			fprintf(outfile, "%s\n", output_string);
		}
	}
	fclose(infile);
	fclose(outfile);

	return(0);
}
/********************************************************************************************/




/********************************************************************************************/
/*******************	TranslateLine()		*************************************************
this is the function responsible for translating a braille word to a print word
*/
void TranslateLine(char print_line[], char braille_line[]){
	int current_state = 1, array_index, match_flag=0, focus_length;
	int focus_match, state_match,
		lctxt_match, rctxt_match; 		// used as flags to represent a match occuring
	char right_of_focus[INPUT_SIZE],	// stores the braille input to the right of the current focus
		 left_of_focus[INPUT_SIZE]="\0"; 	// stores the braille input to the left of the current focus
	char current_character = braille_line[0];
	char focus[FOCUSSIZE], temp_focus[FOCUSSIZE], print_line_copy[INPUT_SIZE];
	rule_t *current_rule;
	int decision_table [5][6]={{1,1,0,0,0,0},	//row represents the current state
							   {1,0,0,0,0,0},	//column represents the current input class
							   {1,0,1,0,0,0},	//rule fires if there is a 1 corresponding to
							   {1,0,0,1,0,0},	//current state and input class
							   {0,0,0,0,1,0}};
	int new_word=1; //flag used to indicate the beginning of a new word


	//initialise variables
	strcpy(print_line_copy, print_line);
	strcpy(right_of_focus, "\0");
	strcpy(braille_line, "\0");

	//remove misc chars. tabs new lines etc
	for (int i=0; i<strlen(print_line);i++){ //for all char in the input string
		if ((int)print_line[i]<32){			 //delete the chars that are misc ie less than 32
			print_line[i]=' ';//note this line actually converts misc chars to space chars
		}
	}


	while(print_line[0] !='\0' && print_line[0] !='\n'){
		match_flag=0;

		//check if capital signs are needed.
		if(new_word && !isspace(print_line[0])){
			if(isalpha(print_line[0])){
				new_word = 0;
			}
			if(isupper(print_line[0])){
				braille_line = strcat(braille_line, ",");
				if(isupper(print_line[1])){
					braille_line = strcat(braille_line, ",");
				}
			}
		}

		//convert the char to upper case for comparison.
		current_character = toupper((int)print_line[0]);
		array_index = (int)current_character-(int)'A';
		if(array_index<0 || array_index>25){
			array_index = 26;
		}


		//search linked list corresponding to array_index for a match
		current_rule = list_array[array_index];

		while(!match_flag && current_rule != NULL){
			strcpy(focus, current_rule->focus); //ie 	focus = current_rule->focus
			focus_length = strlen(focus);

			//check the focus for a match irrelevant of case. Note that windows and linux use different
			//comands to do this.
			//returns 0 if the rule's focus matches the braille word (up to the lengh of the focus)
			//focus_match = strnicmp(focus, print_line, focus_length);		//windows function
			focus_match = strncasecmp(focus, print_line, focus_length);	//linux function

			if (focus_match == 0){
				// the focus has been matched. Now check the state
				// use the rule's state and current input class to check the decision table
				state_match = decision_table[current_state-1][current_rule->input_class-1];
				if (state_match){
					// the state has been matched. now check the left context
					//strncpy(left_of_focus,print_line, position_of_focus);
					lctxt_match = CheckLeftCtxt(current_rule->left_context, left_of_focus);
					if (lctxt_match){
						// the left context has been matched. now check the right context
						strcpy(right_of_focus, &print_line[strlen(current_rule->focus)]);
						// ie. right_of_focus = braille_line from the end of the current focus
						rctxt_match = CheckRightCtxt(current_rule->right_context, right_of_focus);
						if (rctxt_match){
							match_flag=1;
						}
					}
				}
			}
			if(!match_flag){
				//if a match has not been found move to the next rule in the linked list
				current_rule = current_rule->next;
			}
		}
		//a rule has been matched OR the end of the linked list has been reached

		if (match_flag){
			// a match has been found the rule should fire.
			// update the output, the new state and remove the focus from the input braille word

			//update new word flag
			if(!isalpha((int)current_rule->focus[0])){
				new_word=1;
			}

			//update the state
			if (current_rule->next_state != 99){
				current_state = current_rule->next_state;
			}

			//concatenate the output of the rule to the translated word
			//braille_line = strcat(braille_line, current_rule->output);
			strcpy(braille_line, strcat(braille_line, current_rule->output));

			//remove the focus from the input print line and attach it to the left of the focus
			strcpy(print_line, &print_line[focus_length]);
			strcat(left_of_focus, current_rule->focus);
		}else{
/**/		//commented text may be used for debugging new rules
			//printf("!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!\n");
			//printf("!!!The translation has exited without a match being found!!!\n");
			//printf("!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!\n");
			//printf("the print input was:%s.\n", print_line);
			//printf("The right of focus was:%s.\n", right_of_focus);

			// Print "ERROR" to the output file
			strcpy(braille_line, strcat(braille_line, "\n\n\nERRORe\n\n\n"));
			// remove character from the input
			for (int i=0; i<=strlen(print_line); i++){
				print_line[i]=print_line[i+1];
			}
		}
	}
}


/********************************************************************************************/

/********************************************************************************************/
/*******************	CheckLeftCtxt()	*************************************************
*/
int CheckLeftCtxt (char left_ctxt[], char left_of_focus[]){
	int ctxt_match = 0,
		char_ctxt_match;	// used as a flag when matching a single character of a rules right context;
	int n;					// used to process each char of a rule's right context
	int lof_len;			// length of the left of focus string
	char left_focus_cpy[INPUT_SIZE];	//function manipulates a COPY of the left of focus string since manipulation
										//of the left of focus string would alter the string being passed in as the argument.
	strcpy(left_focus_cpy, left_of_focus);

	//get the length of the left of focus string
	if (strlen(left_focus_cpy) == 0){
		lof_len = 0;
	} else {
		lof_len = strlen(left_focus_cpy)-1;
	}

	if(left_ctxt[strlen(left_ctxt)-1] == '\0'){
		//this rule does not have a right context. hence it is a match
		ctxt_match=1;
	}else{ /* a right context exists for the rule, check the braille input
			* left of the focus to see if a match occurs.*/

		/* match each right context character of the rule. if any one of the chars
		   in the right context does not match then rule can not fire */
		char_ctxt_match = 1;
		for (n=strlen(left_ctxt); n>0; n--){
			/* wild cards "!"= a letter
						  "#"= a number
						  "~"= a space or punctuation
						  " "= a space char only
						  "|"= zero or mote capital signs
						  "`"= one or more roman numerals
		  				  ";"= zero or more letters
		  				  "+"= one or more digits								*/
				int i; //used for looping in some of the case statements
			switch (left_ctxt[n-1]){
				case '!': // a letter
					if(!isalpha((int)left_focus_cpy[lof_len])){
						char_ctxt_match = 0;
					} else {//remove the letter char from input to the right of the focus
						//strcpy(right_of_focus, &right_of_focus[1]);
						left_focus_cpy[lof_len]='\0';
					}
					break;
				case '#': // a number
					if( (left_focus_cpy[lof_len]<48) || (left_focus_cpy[lof_len]>57) ){
						// note these are the asci values of 0 and 9. ie if right_of_focus
						// is not a number then set the match to false.
						char_ctxt_match = 0;
					} else {//remove the white space char from input to the right of the focus
						left_focus_cpy[lof_len]='\0';
					}
					break;
				case '~': // a space or punct char
					/* if the frist character to the right of the focus is not a white space character
					 or a punctuation char then set the chat_ctxt_match to false */
					if(isspace((int)left_focus_cpy[lof_len]) || ispunct((int)left_focus_cpy[lof_len]) || (left_focus_cpy[lof_len] == '\0')){
						//remove the space/punct char from the input to the right of the focus
						left_focus_cpy[lof_len]='\0';
					} else {
						char_ctxt_match = 0;
					}
					break;
				case ' ': // a space only
					/* if the frist character to the right of the focus is not a white space character
					 then set the chat_ctxt_match to false */
					if(isspace((int)left_focus_cpy[lof_len]) || (left_focus_cpy[lof_len]=='\0')){
						//remove the white space char from input to the right of the focus
						left_focus_cpy[lof_len]='\0';
					} else {
						char_ctxt_match = 0;
					}
					break;
				case '|': //zero or more capital signs
					if(left_focus_cpy[lof_len] == ',' ){
						left_focus_cpy[lof_len]='\0';
					} else {
						char_ctxt_match = 0;
					}
					break;
				case '`': //one or more roman letters (I, V, M, C)
					if (left_focus_cpy[lof_len]!='I' && left_focus_cpy[lof_len]!='V' && left_focus_cpy[lof_len]!='M' && left_focus_cpy[lof_len]!='C'){
						char_ctxt_match = 0;
					} else {//remove the roman letter for the input to the right of the focus
						int i=lof_len;
						while (left_focus_cpy[i]=='I' || left_focus_cpy[i]=='V' || left_focus_cpy[i]=='M' || left_focus_cpy[i]=='C'){
							left_focus_cpy[i]='\0';
							--i;
						}
					}
					break;
				case ';': //zero or more letters
					/* do not change the value of char_ctxt_match since there will always be
					   zero or more. the vale of char_ctxt_match must be preserved.*/
					//if there are punctuation characters remove them from the right of focus
					i=lof_len;
					while (isalpha((int)left_focus_cpy[i])){//is ture if char is an alpha/letter char.
						left_focus_cpy[i]='\0';
						--i;
					}
					//strcpy(right_of_focus, &right_of_focus[i]);
					break;
				case '+': //one or more digits
					/* note there must be at least one digit. */
					if(!isdigit((int)left_focus_cpy[lof_len])){
						char_ctxt_match = 0;
					} else {//remove the space/punct char from the input to the right of the focus
						int i=lof_len;
						while (isalpha((int)left_focus_cpy[i])){//is ture if char is a punctuation char.
							left_focus_cpy[i]='\0';
							--i;
						}
					}
					break;
				default:
					//left context character is not a wild card. it must be tested as it is. ie the ctxt
					//char must be the next char in the left of focus string.
					if ( toupper((int)left_focus_cpy[lof_len]) == left_ctxt[n-1] ){
						//left context matches rule for a particular char. then remove it.
						left_focus_cpy[lof_len]='\0';
					} else {
						char_ctxt_match = 0;
					}
				}
			}// end FOR statement...for each char of rule's right context

			if (char_ctxt_match == 1){
				ctxt_match = 1;
			}else{
				ctxt_match = 0;

			}
		}
	return ctxt_match;
}
/********************************************************************************************/



/********************************************************************************************/
/*******************	CheckRightCtxt()	*************************************************
*/
int CheckRightCtxt (char right_ctxt[], char right_of_focus[]){
	int ctxt_match = 0,
		char_ctxt_match;	// used as a flag when matching a single character of a rules right context;
	int n;					// used to process each char of a rule's right context

	if(right_ctxt[0] == '\0'){
		//this rule does not have a right context. hence it is a match
		ctxt_match=1;
	}else{ /* a right context exists for the rule, check the braille input
			* right of the focus to see if a match occurs.*/

		/* match each right context character of the rule. if any one of the chars
		   in the right context does not match then rule can not fire */
		char_ctxt_match = 1;
		for (n=0; n<strlen(right_ctxt); n++){
			/* wild cards "!"= a letter
						  "#"= a number
						  "~"= a space or punctuation
						  " "= a space char only
						  "`"= one or more roman numerals
		  				  ";"= zero or more letters
		  				  "+"= one or more digits				*/

			int i; //used for looping in some of the case statements
			switch (right_ctxt[n]){
				case '!': // a letter
					if(!isalpha((int)right_of_focus[0])){
						char_ctxt_match = 0;
					} else {//remove the letter char from input to the right of the focus
						strcpy(right_of_focus, &right_of_focus[1]);
					}
					break;
				case '#': // a number
					if( (right_of_focus[0]<48) || (right_of_focus[0]>57) ){
						// note these are the asci values of 0 and 9. ie if right_of_focus
						// is not a number then set the match to false.
						char_ctxt_match = 0;
					} else {//remove the white space char from input to the right of the focus
						strcpy(right_of_focus, &right_of_focus[1]);
					}
					break;
				case '~': // a space or punct char
					/* if the frist character to the right of the focus is not a white space character
					 or a punctuation char then set the chat_ctxt_match to false */
					if(isspace((int)right_of_focus[0]) || ispunct((int)right_of_focus[0]) || (right_of_focus[0] == '\0')){
						//remove the space/punct char from the input to the right of the focus
						strcpy(right_of_focus, &right_of_focus[1]);
					} else {
						char_ctxt_match = 0;
					}
					break;
				case ' ': // a space only
					/* if the frist character to the right of the focus is not a white space character
					 then set the chat_ctxt_match to false */
					if(isspace((int)right_of_focus[0]) || (right_of_focus[0]=='\0')){
						//remove the white space char from input to the right of the focus
						strcpy(right_of_focus, &right_of_focus[1]);
					} else {
						char_ctxt_match = 0;
					}
					break;
				case '`': //one or more roman letters (I, V, M, C)
					if (right_of_focus[0]!='I' && right_of_focus[0]!='V' && right_of_focus[0]!='M' && right_of_focus[0]!='C'){
						char_ctxt_match = 0;
					} else {//remove the roman letter for the input to the right of the focus
						i=0;
						while (right_of_focus[i]=='I' || right_of_focus[i]=='V' || right_of_focus[i]=='M' || right_of_focus[i]=='C'){
							++i;
						}
						strcpy(right_of_focus, &right_of_focus[i]);
					}
					break;
				case ';': //zero or more letters
					/* do not change the value of char_ctxt_match since there will always be
					   zero or more. the vale of char_ctxt_match must be preserved.*/
					//if there are punctuation characters remove them from the right of focus
					i=0;
					while (isalpha((int)right_of_focus[i])){//is ture if char is a punctuation char.
						++i;
					}
					strcpy(right_of_focus, &right_of_focus[i]);
					break;
				case '+': //one or more digits
					/* note there must be at least one digit. */
					if(!isdigit((int)right_of_focus[0])){
						char_ctxt_match = 0;
					} else {//remove the space/punct char from the input to the right of the focus
						int i=0;
						while (isalpha((int)right_of_focus[i])){//is ture if char is a punctuation char.
							++i;
						}
						strcpy(right_of_focus, &right_of_focus[i]);
					}
					break;
				default:
					if ( toupper((int)right_of_focus[0]) == right_ctxt[n] ){
						//right context matches rule for a particular char. then remove it.
						strcpy(right_of_focus, &right_of_focus[1]);
					} else {
						char_ctxt_match = 0;
					}
				}
			}// end for statement...for each char of rule's right context

			if (char_ctxt_match == 1){
				ctxt_match = 1;
			}else{
				ctxt_match = 0;

			}
		}
	return ctxt_match;
}
/********************************************************************************************/




/********************************************************************************************/
/*******************	ConstructRules()	*************************************************
function reads the file as defined by constant macro RULES_FILE and develops an array of linked lists.
Each array element contains the head of a linked list where one linked lists exists for each letter
of the alphabet (plus another element for non-alpha characters). In this way, when searching for a
rule, only small portion of all rules must be traversed corresponding to the particular letter of interest
*/
void ConstructRules(char *location){
	/*variable declarations*/
	char rule[RULESIZE], tempRule[RULESIZE];
	int array_index;
	rule_t *current_rule, *last_rule=NULL, *next_rule;
	char  *pch;
	/*open the rules input file as defined by RULES_PATH so that a linked list of rules can be constructed*/
	FILE *rulesfile;
	rulesfile = fopen(location,"r");
	if (rulesfile == NULL)
		{
			printf("**************************************\n* Cannot open the rules file         *\n* The rules file should be stored at *\n* %s\n**************************************\n",RULES_PATH);
		}
	else
		{
		/*for each rule go through the input file and split the rule into its various components to be stored
		in the rule_t data structure*/
		while(fgets(rule, 1000, rulesfile)!=NULL){
			current_rule = malloc(sizeof(rule_t));
			current_rule->next = NULL;
			/*set the input class of the rule*/
			current_rule->input_class = (int)rule[0]-48;//must subtract 48 for the integer typecast
			/*set the focus parameter of the rule*/
			strcpy(tempRule, rule);
			if(strstr(tempRule, "[]]")){
				strcpy(current_rule->focus, "]");
			} else {
				pch = strtok (tempRule,"[");
				pch = strtok (NULL, "]");
				strcpy(current_rule->focus, pch);
			}

			/*set the output field of the rule*/
			/* the tokenising of the output is complex due to some rule using multiple equal signs
			hence tokensing at an equal sign creates ambiguity as to which of the multiple equal signs
			to tokenise from. the majority of rules that contain multiple equal signs are those with an
			equal sign in the focus and those translation "for" to the equal sign and hence have an equal
			sign in the output. */
			strcpy(tempRule, rule);
			// check if there are multiple equal signs in the rule
			int counta=0;
			for (int temp=0; temp<strlen(tempRule); temp++){
				if (tempRule[temp]=='='){
					++counta;
				}
			}
			if(counta==1){
				/* there is only one equal sign in the rule. this should be the majority of rules. the only
				   complication occurs in some rules that may have no output. these rules are used only to
				   change the state of program. eg the number sign. */
				if (!strstr(tempRule, "=\t")){
					// an output exists
					pch = strtok (tempRule,"=");
					pch = strtok (NULL, "\t");
					strcpy(current_rule->output, pch);
				} else {
					// an output does not exist
					strcpy(current_rule->output, "\0");  // note "\0" is the null character
				}
			} else {
				/*the rule contains multiple equal signs.
				remove anything before the first equal sign AFTER the end of the focus ie after ']' */
				int index=0;
				//move index to the end of the focus
				while (tempRule[index] !=']'){
					++index;
				}
				//move index to the fist equal sign
				while (tempRule[index] !='='){
					++index;
				}
				//remove anything before this index
				strcpy(tempRule, &tempRule[index+1]);
				pch = strtok (tempRule, "\t");
				strcpy(current_rule->output, pch);
			}


			/*set the next state field of the rule*/
			if (rule[strlen(rule)-2] == '-'){ 	//if the rule has a next state of - then set the next state field
				current_rule->next_state = 99;  //to 99. then when updating state do not change if the next state is 99
			}else{
				current_rule->next_state = (int)rule[strlen(rule)-2]-48;//must subtract 48 for the integer typecast
			}


			/*if there is no right context the strtok command has some problems hence
			 *some we need to check if "]=" occurs in the rule. ie if there is no right context.*/
			strcpy(tempRule, rule);
			if (!strstr(tempRule, NO_RIGHT_CONTEXT)){
				// a right context exists
				pch = strtok (rule,"]");
				pch = strtok (NULL, "=");
				strcpy(current_rule->right_context, pch);
			}else{
				// a right context does not exist
				strcpy(current_rule->right_context, "\0");  // note "\0" is the null character
			}


			/*set the left context field of the rule*/
			strcpy(tempRule, rule);
			if (!strstr(tempRule, "\t[")){
				pch = strtok (rule,"\t");
				pch = strtok (NULL, "[");
				strcpy(current_rule->left_context, pch);
			}else{
				// a left context does not exist
				strcpy(current_rule->left_context, "\0");
			}


			/*determine the list index to according to the first char of the focus*/
			array_index = HashFunction(current_rule->focus[0]);
			if (list_array[array_index] == NULL){
				//Nothing entered in array yet for this focus
				//Add current record into array
				list_array[array_index] = current_rule;
			}else{ //Array entry contains records, go to the end to add this one
				next_rule = list_array[array_index];
                while (next_rule->next != NULL){
	            	next_rule = next_rule->next;
		        }
				//When we get here nextRecord contains the last entry
				//Now just point it to the new entry
				next_rule->next = current_rule;
			}
		}
	}
	fclose(rulesfile);
}
/********************************************************************************************/


/********************************************************************************************/
/*******************	HashFunction()	*****************************************************
This function determines the index of the hash table list_array[] to which a rule should be applied.
It compares the first letter of the rule's focus to the letter A and typecasts the result to an
integer thus defining the index.
*/
int HashFunction(char key){
	int index = (int)key-(int)'A';
	if(index<0 || index>25){
		index = 26;
	}
	return index;
}
/********************************************************************************************/

//print rule function may be useful in debugging
void PrintRule(rule_t *rule){
	printf("%d\t%s[%s]%s=%s\t%d\n", rule->input_class, rule->left_context, rule->focus, rule->right_context, rule->output, rule->next_state);
}