﻿function compose (chars) {
	dchars = decompose(chars);
	
	var outstr = ''; 
	for (i=0; i<dchars.length; i++){ 
		if (i<dchars.length-2 && composition[dchars[i]+dchars[i+1]+dchars[i+2]]) { 
			outstr += composition[dchars[i]+dchars[i+1]+dchars[i+2]];
			i+=2;
			}
		else if (i<dchars.length-1 && composition[dchars[i]+dchars[i+1]]) { 
			outstr += composition[dchars[i]+dchars[i+1]];
			i++;
			}
		else { outstr += dchars[i]; }
		}
		
	document.getElementById('output').value = outstr; 
	return outstr;
	}

function decompose (chars) {
	// note that this is not exactly NFD because we want the accent to always be last (to make matching easier later)
	var outstr = ''; 
	for (i=0; i<chars.length; i++){ 
		if (decomposition[chars[i]]) { 
			outstr += decomposition[chars[i]];
			}
		else { outstr += chars[i]; }
		}
		
	// reorder so that tone mark is last
	outstr = outstr.replace(/'\u0301\u0302'/,'\u0302\u0301');
	outstr = outstr.replace(/\u0300\u0302/,'\u0302\u0300');
	outstr = outstr.replace(/\u0309\u0302/,'\u0302\u0309');
	outstr = outstr.replace(/\u0303\u0302/,'\u0302\u0303');
	outstr = outstr.replace(/\u0323\u0302/,'\u0302\u0323');

	outstr = outstr.replace(/\u0301\u0306/,'\u0306\u0301');
	outstr = outstr.replace(/\u0300\u0306/,'\u0306\u0300');
	outstr = outstr.replace(/\u0309\u0306/,'\u0306\u0309');
	outstr = outstr.replace(/\u0303\u0306/,'\u0306\u0303');
	outstr = outstr.replace(/\u0323\u0306/,'\u0306\u0323');

	outstr = outstr.replace(/\u0301\u031B/,'\u031B\u0301');
	outstr = outstr.replace(/\u0300\u031B/,'\u031B\u0300');
	outstr = outstr.replace(/\u0309\u031B/,'\u031B\u0309');
	outstr = outstr.replace(/\u0303\u031B/,'\u031B\u0303');
	outstr = outstr.replace(/\u0323\u031B/,'\u031B\u0323');
	
	//document.getElementById('transcribe').style.display = 'block'; 
	//document.getElementById('transcribe').value = outstr; 
	return outstr;
	}

var decomposition = { 
'Á':'A\u0301', 'á':'a\u0301', 'À':'A\u0300', 'à':'a\u0300', 'Ã':'A\u0303', 'ã':'a\u0303', 'Ạ':'A\u0323', 'ạ':'a\u0323', 'Ả':'A\u0309', 'ả':'a\u0309', 
'Ấ':'A\u0302\u0301', 'ấ':'a\u0302\u0301', 'Ầ':'A\u0302\u0300', 'ầ':'a\u0302\u0300', 'Ẩ':'A\u0302\u0309', 'ẩ':'a\u0302\u0309', 'Ẫ':'A\u0302\u0303', 'ẫ':'a\u0302\u0303', 'Ậ':'A\u0302\u0323', 'ậ':'a\u0302\u0323', 
'Ắ':'A\u0306\u0301', 'ắ':'a\u0306\u0301', 'Ằ':'A\u0306\u0300', 'ằ':'a\u0306\u0300', 'Ẳ':'A\u0306\u0309', 'ẳ':'a\u0306\u0309', 'Ẵ':'A\u0306\u0303', 'ẵ':'a\u0306\u0303', 'Ặ':'A\u0306\u0323', 'ặ':'a\u0306\u0323', 

'È':'E\u0300', 'è':'e\u0300', 'É':'E\u0301', 'é':'e\u0301', 'Ẽ':'E\u0303', 'ẽ':'e\u0303', 'Ẹ':'E\u0323', 'ẹ':'e\u0323', 'Ẻ':'E\u0309', 'ẻ':'e\u0309', 
'Ế':'E\u0302\u0301', 'ế':'e\u0302\u0301', 'Ề':'E\u0302\u0300', 'ề':'e\u0302\u0300', 'Ể':'E\u0302\u0309', 'ể':'e\u0302\u0309', 'Ễ':'E\u0302\u0303', 'ễ':'e\u0302\u0303', 'Ệ':'E\u0302\u0323', 'ệ':'e\u0302\u0323', 

'Ì':'I\u0300', 'ì':'i\u0300', 'Í':'I\u0301', 'í':'i\u0301', 'Ĩ':'I\u0303', 'ĩ':'i\u0303', 'Ị':'I\u0323', 'ị':'i\u0323', 'Ỉ':'I\u0309', 'ỉ':'i\u0309', 

'Ò':'O\u0300', 'ò':'o\u0300', 'Ó':'O\u0301', 'ó':'o\u0301', 'Õ':'O\u0303', 'õ':'o\u0303', 'Ọ':'O\u0323', 'ọ':'o\u0323', 'Ỏ':'O\u0309', 'ỏ':'o\u0309', 
'Ồ':'O\u0302\u0300', 'ồ':'o\u0302\u0300', 'Ố':'O\u0302\u0301', 'ố':'o\u0302\u0301', 'Ỗ':'O\u0302\u0303', 'ỗ':'o\u0302\u0303', 'Ộ':'O\u0302\u0323', 'ộ':'o\u0302\u0323', 'Ổ':'O\u0302\u0309', 'ổ':'o\u0302\u0309', 
'Ờ':'O\u031B\u0300', 'ờ':'o\u031B\u0300', 'Ớ':'O\u031B\u0301', 'ớ':'o\u031B\u0301', 'Ỡ':'O\u031B\u0303', 'ỡ':'o\u031B\u0303', 'Ợ':'O\u031B\u0323', 'ợ':'o\u031B\u0323', 'Ở':'O\u031B\u0309', 'ở':'o\u031B\u0309', 

'Ù':'U\u0300', 'ù':'u\u0300', 'Ú':'U\u0301', 'ú':'u\u0301', 'Ũ':'U\u0303', 'ũ':'u\u0303', 'Ụ':'U\u0323', 'ụ':'u\u0323', 'Ủ':'U\u0309', 'ủ':'u\u0309', 
'Ừ':'U\u031B\u0300', 'ừ':'u\u031B\u0300', 'Ứ':'U\u031B\u0301', 'ứ':'u\u031B\u0301', 'Ữ':'U\u031B\u0303', 'ữ':'u\u031B\u0303', 'Ự':'U\u031B\u0323', 'ự':'u\u031B\u0323', 'Ử':'U\u031B\u0309', 'ử':'u\u031B\u0309', 

'Ỳ':'Y\u0300', 'ỳ':'y\u0300', 'Ý':'Y\u0301', 'ý':'y\u0301', 'Ỹ':'Y\u0303', 'ỹ':'y\u0303', 'Ỵ':'Y\u0323', 'ỵ':'y\u0323', 'Ỷ':'Y\u0309', 'ỷ':'y\u0309', 

'Ê':'E\u0302', 'ê':'e\u0302', 'Â':'A\u0302', 'â':'a\u0302', 'Ă':'A\u0306', 'ă':'a\u0306', 'Ô':'O\u0302', 'ô':'o\u0302', 'Ơ':'O\u031B', 'ơ':'o\u031B', 'Ư':'U\u031B', 'ư':'u\u031B', 

'¶':'¶'
};

var composition = { 
'A\u0301':'Á', 'a\u0301':'á', 'A\u0300':'À', 'a\u0300':'à', 'A\u0303':'Ã', 'a\u0303':'ã', 'A\u0323':'Ạ', 'a\u0323':'ạ', 'A\u0309':'Ả', 'a\u0309':'ả', 
'A\u0302\u0301':'Ấ', 'a\u0302\u0301':'ấ', 'A\u0302\u0300':'Ầ', 'a\u0302\u0300':'ầ', 'A\u0302\u0309':'Ẩ', 'a\u0302\u0309':'ẩ', 'A\u0302\u0303':'Ẫ', 'a\u0302\u0303':'ẫ', 'A\u0302\u0323':'Ậ', 'a\u0302\u0323':'ậ', 
'A\u0306\u0301':'Ắ', 'a\u0306\u0301':'ắ', 'A\u0306\u0300':'Ằ', 'a\u0306\u0300':'ằ', 'A\u0306\u0309':'Ẳ', 'a\u0306\u0309':'ẳ', 'A\u0306\u0303':'Ẵ', 'a\u0306\u0303':'ẵ', 'A\u0306\u0323':'Ặ', 'a\u0306\u0323':'ặ', 

'E\u0300':'È', 'e\u0300':'è', 'E\u0301':'É', 'e\u0301':'é', 'E\u0303':'Ẽ', 'e\u0303':'ẽ', 'E\u0323':'Ẹ', 'e\u0323':'ẹ', 'E\u0309':'Ẻ', 'e\u0309':'ẻ', 
'E\u0302\u0301':'Ế', 'e\u0302\u0301':'ế', 'E\u0302\u0300':'Ề', 'e\u0302\u0300':'ề', 'E\u0302\u0309':'Ể', 'e\u0302\u0309':'ể', 'E\u0302\u0303':'Ễ', 'e\u0302\u0303':'ễ', 'E\u0302\u0323':'Ệ', 'e\u0302\u0323':'ệ', 

'I\u0300':'Ì', 'i\u0300':'ì', 'I\u0301':'Í', 'i\u0301':'í', 'I\u0303':'Ĩ', 'i\u0303':'ĩ', 'I\u0323':'Ị', 'i\u0323':'ị', 'I\u0309':'Ỉ', 'i\u0309':'ỉ', 

'O\u0300':'Ò', 'o\u0300':'ò', 'O\u0301':'Ó', 'o\u0301':'ó', 'O\u0303':'Õ', 'o\u0303':'õ', 'O\u0323':'Ọ', 'o\u0323':'ọ', 'O\u0309':'Ỏ', 'o\u0309':'ỏ', 
'O\u0302\u0300':'Ồ', 'o\u0302\u0300':'ồ', 'O\u0302\u0301':'Ố', 'o\u0302\u0301':'ố', 'O\u0302\u0303':'Ỗ', 'o\u0302\u0303':'ỗ', 'O\u0302\u0323':'Ộ', 'o\u0302\u0323':'ộ', 'O\u0302\u0309':'Ổ', 'o\u0302\u0309':'ổ', 
'O\u031B\u0300':'Ờ', 'o\u031B\u0300':'ờ', 'O\u031B\u0301':'Ớ', 'o\u031B\u0301':'ớ', 'O\u031B\u0303':'Ỡ', 'o\u031B\u0303':'ỡ', 'O\u031B\u0323':'Ợ', 'o\u031B\u0323':'ợ', 'O\u031B\u0309':'Ở', 'o\u031B\u0309':'ở', 

'U\u0300':'Ù', 'u\u0300':'ù', 'U\u0301':'Ú', 'u\u0301':'ú', 'U\u0303':'Ũ', 'u\u0303':'ũ', 'U\u0323':'Ụ', 'u\u0323':'ụ', 'U\u0309':'Ủ', 'u\u0309':'ủ', 
'U\u031B\u0300':'Ừ', 'u\u031B\u0300':'ừ', 'U\u031B\u0301':'Ứ', 'u\u031B\u0301':'ứ', 'U\u031B\u0303':'Ữ', 'u\u031B\u0303':'ữ', 'U\u031B\u0323':'Ự', 'u\u031B\u0323':'ự', 'U\u031B\u0309':'Ử', 'u\u031B\u0309':'ử', 

'Y\u0300':'Ỳ', 'y\u0300':'ỳ', 'Y\u0301':'Ý', 'y\u0301':'ý', 'Y\u0303':'Ỹ', 'y\u0303':'ỹ', 'Y\u0323':'Ỵ', 'y\u0323':'ỵ', 'Y\u0309':'Ỷ', 'y\u0309':'ỷ', 

'E\u0302':'Ê', 'e\u0302':'ê', 'A\u0302':'Â', 'a\u0302':'â', 'A\u0306':'Ă', 'a\u0306':'ă', 'O\u0302':'Ô', 'o\u0302':'ô', 'O\u031B':'Ơ', 'o\u031B':'ơ', 'U\u031B':'Ư', 'u\u031B':'ư', 

'¶':'¶'
};

function transcribeToIPA (node, region) {
	// transcribes to a phonological level
	// node: points to the textarea containing the text
	// region: either 'n' or 's', representing north or south variants
	var temp = node.value;
	temp = temp.replace(/^ */, ''); // remove leading and trailing spaces
	temp = temp.replace(/ *$/, '');
	temp = compose(temp);
	var syllables = temp.split(' '); //split into syllables
	var outstr = '';
	
	

	for (a = 0; a<syllables.length; a++) {
		syllables[a] = syllables[a].toLowerCase();
		// create an array containing the initial and final consonants (and any other character not in the list below)
		syllables[a] = syllables[a].replace(/gi/, 'ʒ');
		k = syllables[a].match(/[^aeiouăêôơưyãàáâèéìíĩòóõuúùûỳý\u1EA0-\u1EF9]+/g);
		// figure out whether there's an initial and final consonant
		initialC = ''; finalC = ''; C1=''; C2='';
		if (k) {
			if (syllables[a][0].match(/[aeiouăêôơưyãàáâèéìíĩòóõuúùûỳý\u1EA0-\u1EF9]+/) ) { finalC = k[0]; }
			else { 
				initialC = k[0]; 
				if (k.length>1) { finalC = k[1]; }
				}
			}
		// find vowels
		vowels = syllables[a].match(/[aeiouăêôơưyãàáâèéìíĩòóõuúùûỳý\u1EA0-\u1EF9]+/g);
		//alert(vowels[0]);
		
		switch (initialC) {
			case 'c': C1 = 'k'; break;
			case 'ch': C1 = 'c'; break;
			case 'd': if (region == 'n') { C1 = 'z'; break; }
						else { C1 = 'j'; break; }
			case 'đ': C1 = 'd'; break;
			case 'g': C1 = 'ɣ'; 
					break; 
			case 'ʒ': if (region == 'n') { C1 = 'z'; }
						else { C1 = 'ʒ'; }
						break; 
			case 'gh': C1 = 'ɣ'; break;
			case 'kh': C1 = 'x'; break;
			case 'ng': C1 = 'ŋ'; break;
			case 'ngh': C1 = 'ŋ'; break;
			case 'nh': C1 = 'ɲ'; break;
			case 'ph': C1 = 'f'; break;
			case 'q': C1 = 'k'; break;
			case 'r': if (region == 'n') { C1 = 'z'; break; }
						else { C1 = 'r'; break; }
			case 's': if (region == 'n') { C1 = 's'; break; }
						else { C1 = 'ʃ'; break; }
			case 'th': C1 = 'tʰ'; break;
			case 'tr': if (region == 'n') { C1 = 'c'; break; }
						else { C1 = 'ʈ'; break; }
			case 'x': C1 = 's'; break;
			default: C1 = initialC;
			}
		
		if (vowels) { 
		decomposedV = decompose(vowels[0]);  
		// find and store any tone mark, replacing with a marker
		tone = decomposedV.match(/[\u0300\u0301\u0303\u0309\u0323]/);
		if (tone) { decomposedV = decomposedV.replace(/[\u0300\u0301\u0303\u0309\u0323]/,'¶'); } 

		// remove initial i if syllable starts with gi
		if (C1 == 'ʒ' && decomposedV[0] == 'i') { decomposedV = decomposedV.slice(1); }
		
		if(decomposedV[0].match(/^[a]/)) {
			switch (decomposedV) {
				case 'a': V = 'āː'; break;
				case 'au': V = 'āw'; break;
				case 'ao': V = 'āːw'; break;
				case 'ai': V = 'āːj'; break;
				case 'ay': V = 'āj'; break;
				case 'a¶': V = 'a¶ː'; break;
				case 'a¶u': V = 'a¶w'; break;
				case 'a¶o': V = 'a¶ːw'; break;
				case 'a¶i': V = 'a¶ːj'; break;
				case 'a¶y': V = 'a¶j'; break;
				case 'ă': V = 'ā'; break; // ă
				case 'ă¶': V = 'a¶'; break;
				case 'â': V = 'ə̄'; break; // â
				case 'âu': V = 'ə̄w'; break;
				case 'âi': V = 'ə̄j'; break;
				case 'ây': V = 'ə̄j'; break;
				case 'â¶': V = 'ə¶'; break;
				case 'â¶u': V = 'ə¶w'; break;
				case 'â¶i': V = 'ə¶j'; break;
				case 'â¶y': V = 'ə¶j'; break;
				}
			}
		if(decomposedV[0].match(/^[e]/)) {
			switch (decomposedV) {
				case 'e': V = 'ɛ̄'; break;
				case 'eo': V = 'ɛ̄w'; break;
				case 'ê': V = 'ē'; break;
				case 'êu': V = 'ēw'; break;
				case 'e¶': V = 'ɛ¶'; break;
				case 'e¶o': V = 'ɛ¶w'; break;
				case 'ê¶': V = 'e¶'; break;
				case 'ê¶u': V = 'e¶w'; break;
				}
			}
		if(decomposedV[0].match(/^[i]/)) {
			switch (decomposedV) {
				case 'i': V = 'ī'; break;
				case 'ia': V = 'īə'; break;
				case 'iê': V = 'īə'; break;
				case 'iu': V = 'īw'; break;
				case 'iêu': V = 'īəw'; break;
				case 'i¶': V = 'i¶'; break;
				case 'i¶a': V = 'i¶ə'; break;
				case 'iê¶': V = 'i¶ə'; break;
				case 'i¶u': V = 'i¶w'; break;
				case 'iê¶u': V = 'i¶əw'; break;
				}
			}
		if(decomposedV[0].match(/^[y]/)) {
			switch (decomposedV) {
				case 'y': V = 'ī'; break;
				case 'ya': V = 'īə'; break;
				case 'yê': V = 'īə'; break;
				case 'yu': V = 'īw'; break;
				case 'yêu': V = 'īəw'; break;
				case 'y¶': V = 'i¶'; break;
				case 'y¶a': V = 'i¶ə'; break;
				case 'yê¶': V = 'i¶ə'; break;
				case 'y¶u': V = 'i¶w'; break;
				case 'yê¶u': V = 'i¶əw'; break;
				}
			}
		if(decomposedV[0].match(/^[o]/)) {
			switch (decomposedV) {
				case 'o': V = 'ɔ̄'; break;
				case 'oi': V = 'ɔ̄j'; break;
				case 'oa': V = 'wāː'; break;
				case 'oă': V = 'wā'; break;
				case 'oe': V = 'wɛ̄'; break;
				case 'o¶': V = 'ɔ¶'; break;
				case 'o¶i': V = 'ɔ¶i'; break;
				case 'oa¶': V = 'wa¶ː'; break;
				case 'o¶a': V = 'wa¶ː'; break;
				case 'oă¶': V = 'wa¶'; break;
				case 'o¶ă': V = 'wa¶'; break;
				case 'o¶e': V = 'wɛ¶'; break;
				case 'ô': V = 'ō'; break;
				case 'ôi': V = 'ōj'; break;
				case 'ô¶': V = 'o¶'; break;
				case 'ô¶i': V = 'o¶j'; break;
				case 'ơ': V = 'ə̄ː'; break;
				case 'ơi': V = 'ə̄ːj'; break;
				case 'ơu': V = 'ə̄ːw'; break;
				case 'ơ¶': V = 'ə¶ː'; break;
				case 'ơ¶i': V = 'ə¶ːj'; break;
				case 'ơ¶u': V = 'ə¶ːw'; break;
				}
			}
		if(decomposedV[0].match(/^[u]/)) {
			switch (decomposedV) {
				case 'u': V = 'ū'; break;
				case 'ua': V = 'ūə'; break;
				case 'uô': V = 'ūə'; break;
				case 'ui': V = 'ūi'; break; // some ambiguity here, so i chose ui rather than wi or uj
				case 'uy': V = 'ūi'; break;
				case 'uôi': V = 'ūəj'; break;
				case 'uyê': V = 'u̯īə'; break;
				case 'uê': V = 'wē'; break;
				case 'uâ': V = 'wə̄'; break;
				case 'uơ': V = 'wə̄ː'; break;
				case 'ue': V = 'wɛ̄'; break;
				case 'uya': V = 'uiə'; break; // guessing here
				case 'uyu': V = 'uiu'; break; // guessing here
				case 'uây': V = 'uəj'; break; // guessing here
				case 'u¶': V = 'u¶'; break;
				case 'u¶a': V = 'u¶ə'; break;
				case 'uô¶': V = 'u¶ə'; break;
				case 'u¶i': V = 'u¶i'; break;
				case 'ui¶': V = 'u¶i'; break;
				case 'uy¶': V = 'u¶i'; break;
				case 'u¶y': V = 'u¶i'; break;
				case 'uô¶i': V = 'u¶əj'; break;
				case 'uy¶ê': V = 'u̯i¶ə'; break;
				case 'uê¶': V = 'we¶'; break;
				case 'uâ¶': V = 'wə¶'; break;
				case 'uơ¶': V = 'wə¶ː'; break;
				case 'ue¶': V = 'wɛ¶'; break;
				case 'uy¶a': V = 'ui¶ə'; break; // guessing here
				case 'uy¶u': V = 'ui¶u'; break; // guessing here
				case 'uâ¶y': V = 'uə¶j'; break; // guessing here

				case 'ư': V = 'ɯ̄'; break;
				case 'ưa': V = 'ɯ̄ə'; break;
				case 'ươ': V = 'ɯ̄ə'; break;
				case 'ưi': V = 'ɯ̄j'; break;
				case 'ưu': V = 'ɯ̄w'; break;
				case 'ươi': V = 'ɯ̄əj'; break;
				case 'ư¶': V = 'ɯ¶'; break;
				case 'ư¶a': V = 'ɯ¶ə'; break;
				case 'ươ¶': V = 'ɯ¶ə'; break;
				case 'ư¶i': V = 'ɯ¶j'; break;
				case 'ư¶u': V = 'ɯ¶w'; break;
				case 'ươ¶i': V = 'ɯ¶əj'; break;
				}
			}
		// put back any tones
		if (tone) {  
			switch (tone[0]) {
				case '\u0300': V = V.replace(/¶/, '\u0300'); break;
				case '\u0301': V = V.replace(/¶/, '\u0301'); break;
				case '\u0303': V = V.replace(/¶/, '\u0301ˀ'); break;
				case '\u0309': V = V.replace(/¶/, '\u030C'); break;
				case '\u0323': V = V.replace(/¶/, '\u0300ˀ'); break;
				}
			}
		}
		else { V = ''; }
			
		switch (finalC) {
			case 'c': 	if (vowels[0][vowels[0].length-1].match( /[oòóõỏọôồốỗổộùúũủụôu]/ )) { C2 = 'k͡p'; }
							else { C2 = 'k'; }
							break;
			case 'ch': 	if (vowels[0][vowels[0].length-1] .match(/[aáàãảạ]/)) { C2 = 'ik'; }
							else if (region == 'n') { C2 = 'c'; }
							else { C2 = 't'; }
							break;
			case 'n': 	if (region == 'n') { C2 = 'n';  }
							else { C2 = 'ŋ';  }
							break;
			case 'nh':	if (vowels[0][vowels[0].length-1] .match(/[aáàãảạ]/)) { C2 = 'iŋ'; }
							else if (region == 'n') { C2 = 'ɲ'; }
							else { C2 = 'n'; }
							break;
			case 'ng': 	if (vowels[0][vowels[0].length-1].match( /[oòóõỏọôồốỗổộùúũủụôu]/ )) { C2 = 'ŋ͡m'; }
							else { C2 = 'ŋ'; }
							break; 
			case 't': 		if (region == 'n') { C2 = 't'; }
							else { C2 = 'k'; }
							break; 
			default: C2 = finalC;
			}
		

		outstr += C1+V+C2+' ';
		}

	document.getElementById('transcribe').style.display = 'block'; 
	document.getElementById('transcribe').value = outstr; 
	}