Grammalecte  Diff

Differences From Artifact [b008c1ffec]:

To Artifact [b0152aab01]:


16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
...
153
154
155
156
157
158
159
160
161
162
163
164
165
166
167
168
169
170
171
172
173
174
175
176
177
178
...
198
199
200
201
202
203
204205
206
207
208
209
210
211

def cleanWord (sWord):
  "word simplication before calculating distance between words"
  return sWord.lower().translate(_xTransChars).replace("eau", "o").replace("au", "o")


aVowel = set("aáàâäāeéèêëēiíìîïīoóòôöōuúùûüūyýỳŷÿȳœæAÁÀÂÄĀEÉÈÊËĒIÍÌÎÏĪOÓÒÔÖŌUÚÙÛÜŪYÝỲŶŸȲŒÆ")
aConsonant = set("bcdefghjklmnñpqrstvwxzBCDEFGHJKLMNÑPQRSTVWXZ")
aDouble = set("bcdfjklmnprstzBCDFJKLMNPRSTZ") # letter that may be used twice successively


# Similar chars

d1to1 = {
  "1": "liîLIÎ",
  "2": "zZ",
................................................................................
d1toX = {
  "æ": ("ae",),
  "Æ": ("AE",),
  "b": ("bb",),
  "B": ("BB",),
  "c": ("cc", "ss", "qu", "ch"),
  "C": ("CC", "SS", "QU", "CH"),
  "ç": ("ss", "cc", "qh", "ch"),
  "Ç": ("SS", "CC", "QH", "CH"),
  "d": ("dd",),
  "D": ("DD",),
  "é": ("ai", "ei"),
  "É": ("AI", "EI"),
  "è": ("ai", "ei"),
  "È": ("AI", "EI"),
  "ê": ("ai", "ei"),
  "Ê": ("AI", "EI"),
  "ë": ("ai", "ei"),
  "Ë": ("AI", "EI"),
  "f": ("ff", "ph"),
  "F": ("FF", "PH"),
  "g": ("gu", "ge", "gg", "gh"),
  "G": ("GU", "GE", "GG", "GH"),
  "j": ("jj", "dj"),
  "J": ("JJ", "DJ"),
  "k": ("qu", "ck", "ch", "cu", "kk", "kh"),
................................................................................
  "t": ("tt", "th"),
  "T": ("TT", "TH"),
  "x": ("cc", "ct", "xx"),
  "X": ("CC", "CT", "XX"),
  "z": ("ss", "zh"),
  "Z": ("SS", "ZH"),
}
d2toX = {
  "an": ("en",),
  "AN": ("EN",),
  "au": ("eau", "o", "ô"),
  "AU": ("EAU", "O", "Ô"),
  "en": ("an",),|
| <
<
<
<
<
<
<
< >
>
>
>
>
>
>16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
...
153
154
155
156
157
158
159


160
161
162
163


164
165
166
167
168
169
170
...
190
191
192
193
194
195
196
197
198
199
200
201
202
203
204
205
206
207
208
209
210

def cleanWord (sWord):
  "word simplication before calculating distance between words"
  return sWord.lower().translate(_xTransChars).replace("eau", "o").replace("au", "o")


aVowel = set("aáàâäāeéèêëēiíìîïīoóòôöōuúùûüūyýỳŷÿȳœæAÁÀÂÄĀEÉÈÊËĒIÍÌÎÏĪOÓÒÔÖŌUÚÙÛÜŪYÝỲŶŸȲŒÆ")
aConsonant = set("bcçdfghjklmnñpqrstvwxzBCÇDFGHJKLMNÑPQRSTVWXZ")
aDouble = set("bcçdfjklmnprstzBCÇDFJKLMNPRSTZ") # letters that may be used twice successively


# Similar chars

d1to1 = {
  "1": "liîLIÎ",
  "2": "zZ",
................................................................................
d1toX = {
  "æ": ("ae",),
  "Æ": ("AE",),
  "b": ("bb",),
  "B": ("BB",),
  "c": ("cc", "ss", "qu", "ch"),
  "C": ("CC", "SS", "QU", "CH"),


  "d": ("dd",),
  "D": ("DD",),
  "é": ("ai", "ei"),
  "É": ("AI", "EI"),


  "f": ("ff", "ph"),
  "F": ("FF", "PH"),
  "g": ("gu", "ge", "gg", "gh"),
  "G": ("GU", "GE", "GG", "GH"),
  "j": ("jj", "dj"),
  "J": ("JJ", "DJ"),
  "k": ("qu", "ck", "ch", "cu", "kk", "kh"),
................................................................................
  "t": ("tt", "th"),
  "T": ("TT", "TH"),
  "x": ("cc", "ct", "xx"),
  "X": ("CC", "CT", "XX"),
  "z": ("ss", "zh"),
  "Z": ("SS", "ZH"),
}


def get1toXReplacement (cPrev, cCur, cNext):
  if cCur in aConsonant and (cPrev in aConsonant or cNext in aConsonant):
    return ()
  return d1toX.get(cCur, ())


d2toX = {
  "an": ("en",),
  "AN": ("EN",),
  "au": ("eau", "o", "ô"),
  "AU": ("EAU", "O", "Ô"),
  "en": ("an",),