The Soundex Algorithm (DE)

And for my German collegues here is a version of the Soundex algorithm suited for German pronounciation. This is based on the Cologne phonetics algorithm. More information can be found here: [http://de.wikipedia.org/wiki/K%C3%B6lner_Phonetik]

Soundex

using System;

using System.Collections.Generic;

using System.Linq;

using System.Text;

using System.Text.RegularExpressions;

namespace Soundex

{

publicclassSoundexClass

{

publicstaticstring createSoundexCodeDE(string word)

{

string code = "", char0 = "", char1 = "", char2 = "";

word = word.ToLower();

if ((word.Length) < 1) { return""; }

// Umwandlung: v->f, w->f, j->i, y->i, ph->f, ä->a, ö->o, ü->u, ß->ss, é->e, è->e, ê->e, à->a, á->a, â->a, ë->e

word = word.Replace("ç", "c").Replace("v", "f").Replace("w", "f").Replace("j", "i").Replace("y", "i").Replace("ph", "f").Replace("ä", "a").Replace("ö", "o").Replace("ü", "u").Replace("ß", "ss").Replace("é", "e").Replace("è", "e").Replace("ê", "e").Replace("à", "a").Replace("á", "a").Replace("â", "a").Replace("ë", "e");

// Nur Buchstaben (keine Zahlen, keine Sonderzeichen)

word = Regex.Replace(word, @"[^A-Za-z ]+", "");

// Wir hängen bei 1-buchstabigen Strings ein Leerzeichen an, sonst funktioniert die Anlautprüfung auf den zweiten Buchstaben nicht.

if (word.Length == 1) { word += " "; }

// Sonderfälle bei Wortanfang (Anlaut)

int index = 0;

if (word.Substring(0, 1) == "c")

{

// vor a,h,k,l,o,q,r,u,x

switch (word.Substring(1, 1))

{

case"a":

case"h":

case"k":

case"l":

case"o":

case"q":

case"r":

case"u":

case"x":

code = "4";

break;

default:

code = "8";

break;

}

index = 1;

}

else

{

index = 0;

}

//* Umwandlungstabelle:

//* ============================================

//* Buchstabe Kontext Code

//* ------------- ----------------------- ----

//* A,E,I,J,O,U,Y 0

//* H -

//* B 1

//* P nicht vor H 1

//* D,T nicht vor C,S,Z 2

//* F,V,W 3

//* P vor H 3

//* G,K,Q 4

//* C im Wortanfang

//* vor A,H,K,L,O,Q,R,U,X 4

//* C vor A,H,K,O,Q,U,X

//* ausser nach S,Z 4

//* X nicht nach C,K,Q 48

//* L 5

//* M,N 6

//* R 7

//* S,Z 8

//* C nach S,Z 8

//* C im Wortanfang ausser vor

//* A,H,K,L,O,Q,R,U,X 8

//* C nicht vor A,H,K,O,Q,U,X 8

//* D,T vor C,S,Z 8

//* X nach C,K,Q 8

//* --------------------------------------------

while (index < word.Length)

{

// aktuelle Zeichen

char1 = word.Substring(index, 1);

// vorherige Zeichen

char0 = "";

try { char0 = word.Substring(index - 1, 1); }

catch { }

// nächsten Zeichen

char2 = "";

try { char2 = word.Substring(index + 1, 1); }

catch { }

switch (char1)

{

case"a":

case"e":

case"i":

case"o":

case"u":

code += "0";

break;

case"b":

case"p":

code += "1";

break;

case"d":

case"t":

if ((index + 1) < word.Length)

{

switch (char2)

{

case"c":

case"s":

case"z":

code += "8";

break;

default:

code += "2";

break;

}

else

{

code += "2";

}

break;

case"f":

code += "3";

break;

case"g":

case"k":

case"q":

code += "4";

break;

case"c":

if ((index + 1) < word.Length)

{

switch (char2)

{

case"a":

case"h":

case"k":

case"o":

case"q":

case"u":

case"x":

switch (char0)

{

case"s":

case"z":

code += "8";

break;

default:

code += "4";

break;

}

break;

default:

code += "8";

break;

}

else

{

code += "8";

}

break;

case"x":

if (index > 0)

{

switch (char0)

{

case"c":

case"k":

case"q":

code += "8";

break;

default:

code += "48";

break;

}

else

{

code += "48";

}

break;

case"l":

code += "5";

break;

case"m":

case"n":

code += "6";

break;

case"r":

code += "7";

break;

case"s":

case"z":

code += "8";

break;

}

index++;

}

// entfernen aller Codes "0" ausser am Anfang

string code1 = code.Substring(0, 1);

code = code.Replace("0", "");

// Mehrfach Codes entfernen und Rückgabe

for (int i = 1; i < code.Length; i++)

{

char1 = code.Substring(i, 1);

char0 = "";

try { char0 = code.Substring(i - 1, 1); }

catch { }

if (char1 != char0)

{

code1 += char1;

}

return code1;

}

· I’ve converted the above code to .Net from PHP, so please thoroughly test before putting into production. The original PHP script can be found here: [https://github.com/deezaster/germanphonetic/blob/master/x3m_soundex_ger.php]

The new X++ required to populate the Soundex Table is as follows:

Job to bulk populate Soundex Table (names + addresses)

staticvoid PopulateSoundexDE(Args _args)

{

DirPartyTable objDirPartyTable;

LogisticsPostalAddress objLogisticsPostalAddress;

Soundex objSoundex;

List list;

ListIterator iterator;

str word, text, code;

int position;

// noise words for entities (DE)

container noiseWords1 = ["???", "???", "???"];

// noise word list for addresses (DE)

container noiseWords2 = ["???", "???", "???"];

// format for a valid word (alpha chars only)

System.Text.RegularExpressions.Regex objRegex1 = new System.Text.RegularExpressions.Regex("^[A-Z][a-z]+$");

// clear [Soundex] table

delete_from objSoundex where objSoundex.LanguageId == "de";

// process entity names

whileselect RecId, Name from objDirPartyTable

{

list = new List(Types::String);

text = objDirPartyTable.Name;

// replace any punctuation and formatting with a word seperator

text = strReplace(text, '\n', ' ');

text = strReplace(text, '-', ' ');

text = strReplace(text, '/', ' ');

text = strReplace(text, ',', ' ');

text = strReplace(text, '.', ' ');

// split text on space

list = Global::strSplit(text, " ");

iterator = new ListIterator(list);

position = 1;

while(iterator.more())

{

word = iterator.value();

if (objRegex1.IsMatch(word) && conFind(noiseWords1, word) == 0)

{

objSoundex.initValue();

objSoundex.ContextTableId = tableName2id("DirPartyTable");

objSoundex.ContextRecId = objDirPartyTable.RecId;

objSoundex.LanguageId = "de";

objSoundex.Position = position;

objSoundex.Word = word;

code = Soundex.SoundexClass::createSoundexCodeDE(word);

objSoundex.SoundexCode = str2int(code);

objSoundex.insert();

}

position++;

iterator.next();

}

// process entity addresses

whileselect RecId, Address from objLogisticsPostalAddress

{

list = new List(Types::String);

text = objLogisticsPostalAddress.Address;

// replace any punctuation and formatting with a word seperator

text = strReplace(text, '\n', ' ');

text = strReplace(text, '-', ' ');

text = strReplace(text, '/', ' ');

text = strReplace(text, ',', ' ');

text = strReplace(text, '.', ' ');

// split text on space

list = Global::strSplit(text, " ");

iterator = new ListIterator(list);

position = 1;

while(iterator.more())

{

word = iterator.value();

if (objRegex1.IsMatch(word) && conFind(noiseWords2, word) == 0)

{

objSoundex.initValue();

objSoundex.ContextTableId = tableName2id("LogisticsPostalAddress");

objSoundex.ContextRecId = objLogisticsPostalAddress.RecId;

objSoundex.LanguageId = "de";

objSoundex.Position = position;

objSoundex.Word = word;

code = Soundex.SoundexClass::createSoundexCodeDE(word);

objSoundex.SoundexCode = str2int(code);

objSoundex.insert();

}

position++;

iterator.next();

}

REGARDS

The Soundex Algorithm (DE)

Trending Articles

Practice Sheet of Right form of verbs for HSC Students

Download: FK ft Shenky – Nakuyewa ”Prod by: Shenky”

How to win at Markstrat (Markstrat Tips and Tricks) – Vodites

Ominde Commission Report and Recommendations – Ominde Report of 1964

Bureau of Internal Revenue: Regional Offices (Directory)

GO 53 on Enhancement of Ex-gratia upto 5 Lakhs Toddy Tappers in Telangana

Cakewalk CA-2A Leveling Amplifier v2.0.1.97 WiN, v2.0.1.96 OSX Incl Keygen

Mp3 Download: Mdu - Kunjenjenjena

How the kill the job , when DTP request running for long hours.

Microsoft Intune から展開しているアプリのアップデートについて

18-year-old girl was beaten for half an hour by two Northampton men in 'an...

Car crash in Dunton Bassett leaves driver in critical condition

Macky 2, Two Others In Road Accident

Application log 00000000000000089514: Could not convert queue DLVST90CLNT

Detroit mafia: D’Anna Brothers agree to plea deal

Delivery block field greyed out using VA02

Muloraki Au

【個人撮影】スマホのプライベート映像♪「中に出さないで///」カラオケ屋での生ハメ撮りが流出ｗ【リベンジポルノ】＠PornHub

BREAKING NEWS: Diamond Platnumz Is Reported Dead After Ghastly Car Accident

FIAT 500 B0111 B0112