// MetaphoneString.h #ifndef MetaphoneStringh #define MetaphoneStringh #if _MSC_VER > 1000 #pragma once #endif // _MSC_VER > 1000 class CMetaphoneString : public CString { int length, last; bool alternate; CString primary, secondary; public: CMetaphoneString() {} CMetaphoneString(const char* in) : CString(in) {} CMetaphoneString(const CString& in) : CString(in) {} bool SlavoGermanic() {return (Find('W')>-1) || (Find('K')>-1) || (Find("CZ")>-1) || (Find("WITZ")>-1);} void MetaphAdd(const char* main) { if(*main) { primary+=main; secondary+=main; } } void MetaphAdd(const char* main, const char* alt) { if(*main) primary+=main; if(*alt) { alternate=true; if(*alt!=' ') secondary+=alt; }else if(*main && (*main!=' ')) secondary+=main; } bool IsVowel(int at) { if((at<0) || (at>=length)) return false; char it=GetAt(at); static const CString Vowels("AEIOU"); return Vowels.Find(it)!=-1; } bool StringAt(int start, int length, ...) { if(start<0) return false; char buffer[64]; char* test; CString target; test=buffer; target=Mid(start, length); va_list sstrings; va_start(sstrings, length); do { test=va_arg(sstrings, char*); if(*test && (target==test)) return true; }while(strcmp(test, "")); va_end(sstrings); return false; } void CMetaphoneString::DoubleMetaphone(CString& metaph, CString& metaph2) { // Alters metaph and metaph2 int current=0; length=GetLength(); if(length<1) return; last=length-1; // zero based index alternate=false; MakeUpper(); Insert(GetLength(), " "); //pad the original string so that we can index beyond the edge of the world if(StringAt(0, 2, "GN", "KN", "PN", "WR", "PS", "")) ++current; // skip these when at start of word if(GetAt(0)=='X') { // Initial 'X' is pronounced 'Z' e.g. 'Xavier' MetaphAdd("S"); //'Z' maps to 'S' ++current; } while(true || (primary.GetLength()<4) || (secondary.GetLength()<4)) { if(current>=length) break; switch(GetAt(current)) { case 'A': case 'E': case 'I': case 'O': case 'U': case 'Y': if(current==0) MetaphAdd("A"); // all init vowels now map to 'A' ++current; break; case 'B': //"-mb", e.g", "dumb", already skipped over... MetaphAdd("P"); if(GetAt(++current)=='B') ++current; break; case 'Ç': MetaphAdd("S"); ++current; break; case 'C': //various germanic if((current>1) && !IsVowel(current-2) && StringAt((current-1), 3, "ACH", "") && ((GetAt(current+2) != 'I') && ((GetAt(current+2) != 'E') || StringAt((current-2), 6, "BACHER", "MACHER", "")))) { MetaphAdd("K"); current+=2; break; } if((current==0) && StringAt(current, 6, "CAESAR", "")) { // special case 'caesar' MetaphAdd("S"); current+=2; break; } if(StringAt(current, 4, "CHIA", "")) { //italian 'chianti' MetaphAdd("K"); current+=2; break; } if(StringAt(current, 2, "CH", "")) { if((current>0) && StringAt(current, 4, "CHAE", "")) { //find 'michael' MetaphAdd("K", "X"); current +=2; break; } if((current==0) //greek roots e.g. 'chemistry', 'chorus': && (StringAt((current+1), 5, "HARAC", "HARIS", "") || StringAt((current+1), 3, "HOR", "HYM", "HIA", "HEM", "")) && !StringAt(0, 5, "CHORE", "")) { MetaphAdd("K"); current+=2; break; } //germanic, greek, or otherwise 'ch' for 'kh' sound if((StringAt(0, 4, "VAN ", "VON ", "") || StringAt(0, 3, "SCH", "")) || StringAt((current-2), 6, "ORCHES", "ARCHIT", "ORCHID", "") // 'architect but not 'arch', 'orchestra', 'orchid' || StringAt((current+2), 1, "T", "S", "") || ((StringAt((current-1), 1, "A", "O", "U", "E", "") || (current==0)) //e.g., 'wachtler', 'wechsler', but not 'tichner' && StringAt((current+2), 1, "L", "R", "N", "M", "B", "H", "F", "V", "W", " ", ""))) { MetaphAdd("K"); }else{ if(current>0) { if(StringAt(0, 2, "MC", "")) MetaphAdd("K"); //e.g., "McHugh" else MetaphAdd("X", "K"); }else MetaphAdd("X"); } current+=2; break; } if(StringAt(current, 2, "CZ", "") && !StringAt((current-2), 4, "WICZ", "")) { //e.g, 'czerny' MetaphAdd("S", "X"); current+=2; break; } if(StringAt((current+1), 3, "CIA", "")) { //e.g., 'focaccia' MetaphAdd("X"); current+=3; break; } if(StringAt(current, 2, "CC", "") && !((current==1) && (GetAt(0)=='M'))) { //double 'C', but not if e.g. 'McClellan' if(StringAt((current+2), 1, "I", "E", "H", "") && !StringAt((current+2), 2, "HU", "")) { //'bellocchio' but not 'bacchus' if(((current==1) && (GetAt(current-1)=='A')) //'accident', 'accede' 'succeed' || StringAt((current-1), 5, "UCCEE", "UCCES", "")) MetaphAdd("KS"); else MetaphAdd("X"); //'bacci', 'bertucci', other italian current+=3; break; }else{ //Pierce's rule MetaphAdd("K"); current+=2; break; } } if(StringAt(current, 2, "CK", "CG", "CQ", "")) { MetaphAdd("K"); current+=2; break; } if(StringAt(current, 2, "CI", "CE", "CY", "")) { //italian vs. english if(StringAt(current, 3, "CIO", "CIE", "CIA", "")) MetaphAdd("S", "X"); else MetaphAdd("S"); current+=2; break; } //else MetaphAdd("K"); if(StringAt((current+1), 2, " C", " Q", " G", "")) current+=3; //name sent in 'mac caffrey', 'mac gregor else{ if(StringAt((current+1), 1, "C", "K", "Q", "") && !StringAt((current+1), 2, "CE", "CI", "")) current+=2; else ++current; } break; case 'D': if(StringAt(current, 2, "DG", "")) { if(StringAt((current+2), 1, "I", "E", "Y", "")) { //e.g. 'edge' MetaphAdd("J"); current+=3; break; }else{ //e.g. 'edgar' MetaphAdd("TK"); current+=2; break; } } if(StringAt(current, 2, "DT", "DD", "")) { MetaphAdd("T"); current+=2; break; } //else MetaphAdd("T"); ++current; break; case 'F': if(GetAt(++current)=='F') ++current; MetaphAdd("F"); break; case 'G': if(GetAt(current+1)=='H') { if((current>0) && !IsVowel(current-1)) { MetaphAdd("K"); current+=2; break; } if(current==0) { //'ghislane', ghiradelli if(GetAt(current+2)=='I') MetaphAdd("J"); else MetaphAdd("K"); current+=2; break; } //Parker's rule (with some further refinements)-e.g., 'hugh' if(((current>1) && StringAt((current-2), 1, "B", "H", "D", "")) || ((current>2) && StringAt((current-3), 1, "B", "H", "D", "")) //e.g., 'bough' || ((current>3) && StringAt((current-4), 1, "B", "H", ""))) { //e.g., 'broughton' current+=2; break; }else{ //e.g., 'laugh', 'McLaughlin', 'cough', 'gough', 'rough', 'tough' if((current>2) && (GetAt(current-1)=='U') && StringAt((current-3), 1, "C", "G", "L", "R", "T", "")) { MetaphAdd("F"); }else if((current>0) && GetAt(current-1) != 'I') MetaphAdd("K"); current+=2; break; } } if(GetAt(current+1)=='N') { if((current==1) && IsVowel(0) && !SlavoGermanic()) { MetaphAdd("KN", "N"); }else { if(!StringAt((current+2), 2, "EY", "") //not e.g. 'cagney' && (GetAt(current+1) != 'Y') && !SlavoGermanic()) { MetaphAdd("N", "KN"); }else MetaphAdd("KN"); } current+=2; break; } if(StringAt((current+1), 2, "LI", "") && !SlavoGermanic()) { //'tagliaro' MetaphAdd("KL", "L"); current+=2; break; } if((current==0) //-ges-,-gep-,-gel-, -gie- at beginning && ((GetAt(current+1)=='Y') || StringAt((current+1), 2, "ES", "EP", "EB", "EL", "EY", "IB", "IL", "IN", "IE", "EI", "ER", ""))) { MetaphAdd("K", "J"); current+=2; break; } if((StringAt((current+1), 2, "ER", "") || (GetAt(current+1)=='Y')) // -ger-, -gy- && !StringAt(0, 6, "DANGER", "RANGER", "MANGER", "") && !StringAt((current-1), 1, "E", "I", "") && !StringAt((current-1), 3, "RGY", "OGY", "")) { MetaphAdd("K", "J"); current+=2; break; } if(StringAt((current+1), 1, "E", "I", "Y", "") || StringAt((current-1), 4, "AGGI", "OGGI", "")) { // italian e.g, 'biaggi' if((StringAt(0, 4, "VAN ", "VON ", "") || StringAt(0, 3, "SCH", "")) //obvious germanic || StringAt((current+1), 2, "ET", "")) { MetaphAdd("K"); }else{ //always soft if french ending if(StringAt((current+1), 4, "IER ", "")) MetaphAdd("J"); else MetaphAdd("J", "K"); } current+=2; break; } if(GetAt(++current)=='G') ++current; MetaphAdd("K"); break; case 'H': if(((current==0) || IsVowel(current-1)) //only keep if first & before vowel or btw. 2 vowels && IsVowel(current+1)) { MetaphAdd("H"); current+=2; }else ++current; //also takes care of 'HH' break; case 'J': if(StringAt(current, 4, "JOSE", "") || StringAt(0, 4, "SAN ", "")) { //obvious spanish, 'jose', 'san jacinto' if(((current==0) && (GetAt(current+4)==' ')) || StringAt(0, 4, "SAN ", "")) MetaphAdd("H"); else MetaphAdd("J", "H"); ++current; break; } if((current==0) && !StringAt(current, 4, "JOSE", "")) MetaphAdd("J", "A");//Yankelovich/Jankelowicz else { if(IsVowel(current-1) //spanish pron. of e.g. 'bajador' && !SlavoGermanic() && ((GetAt(current+1)=='A') || (GetAt(current+1)=='O'))) MetaphAdd("J", "H"); else { if(current==last) MetaphAdd("J", " "); else{ if(!StringAt((current+1), 1, "L", "T", "K", "S", "N", "M", "B", "Z", "") && !StringAt((current-1), 1, "S", "K", "L", "")) MetaphAdd("J"); } } } if(GetAt(++current)=='J') ++current; //Buddhist, Islamic eg.'avijja', 'hajjes' break; case 'K': if(GetAt(++current)=='K') ++current; MetaphAdd("K"); break; case 'L': if(GetAt(current+1)=='L') { if(((current==(length-3)) //spanish e.g. 'cabrillo', 'gallegos' && StringAt((current-1), 4, "ILLO", "ILLA", "ALLE", "")) || ((StringAt((last-1), 2, "AS", "OS", "") || StringAt(last, 1, "A", "O", "")) && StringAt((current-1), 4, "ALLE", ""))) { MetaphAdd("L", " "); current+=2; break; } current+=2; }else ++current; MetaphAdd("L"); break; case 'M': if((StringAt((current-1), 3, "UMB", "") && (((current+1)==last) || StringAt((current+2), 2, "ER", ""))) || (GetAt(current+1)=='M')) //'dumb','thumb' current+=2; else ++current; MetaphAdd("M"); break; case 'N': if(GetAt(++current)=='N') ++current; MetaphAdd("N"); break; case 'Ñ': ++current; MetaphAdd("N"); break; case 'P': if(GetAt(current+1)=='H') { MetaphAdd("F"); current+=2; break; } if(StringAt((++current), 1, "P", "B", "")) ++current; //also account for "campbell", "raspberry" MetaphAdd("P"); break; case 'Q': if(GetAt(++current)=='Q') ++current; MetaphAdd("K"); break; case 'R': if((current==last) //french e.g. 'rogier', but exclude 'hochmeier' && !SlavoGermanic() && StringAt((current-2), 2, "IE", "") && !StringAt((current-4), 2, "ME", "MA", "")) MetaphAdd("", "R"); else MetaphAdd("R"); if(GetAt(++current)=='R') ++current; break; case 'S': if(StringAt((current-1), 3, "ISL", "YSL", "")) { //special cases 'island', 'isle', 'carlisle', 'carlysle' ++current; break; } if((current==0) && StringAt(current, 5, "SUGAR", "")) { //special case 'sugar-' MetaphAdd("X", "S"); ++current; break; } if(StringAt(current, 2, "SH", "")) { //germanic if(StringAt((current+1), 4, "HEIM", "HOEK", "HOLM", "HOLZ", "")) MetaphAdd("S"); else MetaphAdd("X"); current+=2; break; } //italian & armenian if(StringAt(current, 3, "SIO", "SIA", "") || StringAt(current, 4, "SIAN", "")) { if(!SlavoGermanic()) MetaphAdd("S", "X"); else MetaphAdd("S"); current+=3; break; } //german & anglicisations, e.g. 'smith' match 'schmidt', 'snider' match 'schneider' //also, -sz- in slavic language altho in hungarian it is pronounced 's' if(((current==0) && StringAt((current+1), 1, "M", "N", "L", "W", "")) || StringAt((current+1), 1, "Z", "")) { MetaphAdd("S", "X"); if(StringAt((++current), 1, "Z", "")) ++current; break; } if(StringAt(current, 2, "SC", "")) { if(GetAt(current+2)=='H') { //Schlesinger's rule if(StringAt((current+3), 2, "OO", "ER", "EN", "UY", "ED", "EM", "")) { //dutch origin, e.g. 'school', 'schooner' //'schermerhorn', 'schenker' if(StringAt((current+3), 2, "ER", "EN", "")) MetaphAdd("X", "SK"); else MetaphAdd("SK"); current+=3; break; }else{ if((current==0) && !IsVowel(3) && (GetAt(3) != 'W')) MetaphAdd("X", "S"); else MetaphAdd("X"); current+=3; break; } } if(StringAt((current+2), 1, "I", "E", "Y", "")) { MetaphAdd("S"); current+=3; break; } //else MetaphAdd("SK"); current+=3; break; } //french e.g. 'resnais', 'artois' if((current==last) && StringAt((current-2), 2, "AI", "OI", "")) MetaphAdd("", "S"); else MetaphAdd("S"); if(StringAt((++current), 1, "S", "Z", "")) ++current; break; case 'T': if(StringAt(current, 4, "TION", "")) { MetaphAdd("X"); current+=3; break; } if(StringAt(current, 3, "TIA", "TCH", "")) { MetaphAdd("X"); current+=3; break; } if(StringAt(current, 2, "TH", "") || StringAt(current, 3, "TTH", "")) { //special case 'thomas', 'thames' or germanic if(StringAt((current+2), 2, "OM", "AM", "") || StringAt(0, 4, "VAN ", "VON ", "") || StringAt(0, 3, "SCH", "")) MetaphAdd("T"); else MetaphAdd("0", "T"); current+=2; break; } if(StringAt((++current), 1, "T", "D", "")) ++current; MetaphAdd("T"); break; case 'V': if(GetAt(++current)=='V') ++current; MetaphAdd("F"); break; case 'W': if(StringAt(current, 2, "WR", "")) { //can also be in middle of word MetaphAdd("R"); current+=2; break; } if((current==0) && (IsVowel(current+1) || StringAt(current, 2, "WH", ""))) { //Wasserman should match Vasserman if(IsVowel(current+1)) MetaphAdd("A", "F"); else MetaphAdd("A"); //need Uomo to match Womo } //Arnow should match Arnoff if(((current==last) && IsVowel(current-1)) || StringAt((current-1), 5, "EWSKI", "EWSKY", "OWSKI", "OWSKY", "") || StringAt(0, 3, "SCH", "")) { MetaphAdd("", "F"); ++current; break; } //polish e.g. 'filipowicz' if(StringAt(current, 4, "WICZ", "WITZ", "")) { MetaphAdd("TS", "FX"); current +=4; break; } //else skip it ++current; break; case 'X': //french e.g. breaux if(!((current==last) && (StringAt((current-3), 3, "IAU", "EAU", "") || StringAt((current-2), 2, "AU", "OU", "")))) MetaphAdd("KS"); if(StringAt((++current), 1, "C", "X", "")) ++current; break; case 'Z': //chinese pinyin e.g. 'zhao' if(GetAt(current+1)=='H') { MetaphAdd("J"); current+=2; break; }else { if(StringAt((current+1), 2, "ZO", "ZI", "ZA", "") || (SlavoGermanic() && ((current>0) && GetAt(current-1) != 'T'))) MetaphAdd("S", "TS"); else MetaphAdd("S"); } if(GetAt(++current)=='Z') ++current; break; default: ++current; } } metaph=primary; // if(metaph.GetLength()>4) metaph.SetAt(4,'\0'); // only give back 4 char metaph if(alternate) { metaph2=secondary; // if(metaph2.GetLength()>4) metaph2.SetAt(4,'\0'); // only give back 4 char metaph }else metaph2.Empty(); } }; #endif // ndef MetaphoneStringh