Grammalecte  Check-in [767e396f2d]

Overview
Comment:[core] ibdawg: suggestion mechanism > reduce 1toX replacements overload (much, much faster)
Downloads: Tarball | ZIP archive | SQL archive
Timelines: family | ancestors | descendants | both | core | spellsugg
Files: files | file ages | folders
SHA3-256: 767e396f2db4db56a9299f004eef88e9168f0dd2de411ec3a7890e1400128d7e
User & Date: olr on 2017-11-09 11:56:13
Original Comment: [core] ibdawg: suggestion mechanisme > reduce 1toX replacements overload (much, much faster)
Other Links: branch diff | manifest | tags
Context
2017-11-21
16:57
[core] better suggestion engine Closed-Leaf check-in: 6c5050fe91 user: olr tags: core, spellsugg
2017-11-10
16:52
[core] merge spellsugg: much faster suggestion engine check-in: e6e44e506c user: olr tags: core, trunk
2017-11-09
11:56
[core] ibdawg: suggestion mechanism > reduce 1toX replacements overload (much, much faster) check-in: 767e396f2d user: olr tags: core, spellsugg
2017-11-08
21:16
[core] ibdawg: update char_player check-in: 51e3a2e76e user: olr tags: core, spellsugg
Changes

Modified gc_core/js/char_player.js from [ac345212e4] to [0547b59e35].

23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
...
160
161
162
163
164
165
166
167
168
169
170
171
172
173
174
175
176
177
178
179
180
181
182
183
184
185
...
205
206
207
208
209
210
211







212
213
214
215
216
217
218
        for (let c of sWord) {
            sRes += this._dTransChars.gl_get(c, c);
        }
        return sRes.replace("eau", "o").replace("au", "o");
    },

    aVowel: new Set("aáàâäāeéèêëēiíìîïīoóòôöōuúùûüūyýỳŷÿȳœæAÁÀÂÄĀEÉÈÊËĒIÍÌÎÏĪOÓÒÔÖŌUÚÙÛÜŪYÝỲŶŸȲŒÆ"),
    aConsonant: new Set("bcdefghjklmnñpqrstvwxzBCDEFGHJKLMNÑPQRSTVWXZ"),
    aDouble: new Set("bcdfjklmnprstzBCDFJKLMNPRSTZ"),  // letter that may be used twice successively


    // Similar chars

    d1to1: new Map([
        ["1", "liîLIÎ"],
        ["2", "zZ"],
................................................................................
    d1toX: new Map([
        ["æ", ["ae",]],
        ["Æ", ["AE",]],
        ["b", ["bb",]],
        ["B", ["BB",]],
        ["c", ["cc", "ss", "qu", "ch"]],
        ["C", ["CC", "SS", "QU", "CH"]],
        ["ç", ["ss", "cc", "qh", "ch"]],
        ["Ç", ["SS", "CC", "QH", "CH"]],
        ["d", ["dd",]],
        ["D", ["DD",]],
        ["é", ["ai", "ei"]],
        ["É", ["AI", "EI"]],
        ["è", ["ai", "ei"]],
        ["È", ["AI", "EI"]],
        ["ê", ["ai", "ei"]],
        ["Ê", ["AI", "EI"]],
        ["ë", ["ai", "ei"]],
        ["Ë", ["AI", "EI"]],
        ["f", ["ff", "ph"]],
        ["F", ["FF", "PH"]],
        ["g", ["gu", "ge", "gg", "gh"]],
        ["G", ["GU", "GE", "GG", "GH"]],
        ["j", ["jj", "dj"]],
        ["J", ["JJ", "DJ"]],
        ["k", ["qu", "ck", "ch", "cu", "kk", "kh"]],
................................................................................
        ["t", ["tt", "th"]],
        ["T", ["TT", "TH"]],
        ["x", ["cc", "ct", "xx"]],
        ["X", ["CC", "CT", "XX"]],
        ["z", ["ss", "zh"]],
        ["Z", ["SS", "ZH"]],
    ]),








    d2toX: new Map([
        ["an", ["en",]],
        ["AN", ["EN",]],
        ["au", ["eau", "o", "ô"]],
        ["AU", ["EAU", "O", "Ô"]],
        ["en", ["an",]],







|
|







 







<
<




<
<
<
<
<
<







 







>
>
>
>
>
>
>







23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
...
160
161
162
163
164
165
166


167
168
169
170






171
172
173
174
175
176
177
...
197
198
199
200
201
202
203
204
205
206
207
208
209
210
211
212
213
214
215
216
217
        for (let c of sWord) {
            sRes += this._dTransChars.gl_get(c, c);
        }
        return sRes.replace("eau", "o").replace("au", "o");
    },

    aVowel: new Set("aáàâäāeéèêëēiíìîïīoóòôöōuúùûüūyýỳŷÿȳœæAÁÀÂÄĀEÉÈÊËĒIÍÌÎÏĪOÓÒÔÖŌUÚÙÛÜŪYÝỲŶŸȲŒÆ"),
    aConsonant: new Set("bcçdfghjklmnñpqrstvwxzBCÇDFGHJKLMNÑPQRSTVWXZ"),
    aDouble: new Set("bcçdfjklmnprstzBCÇDFJKLMNPRSTZ"),  // letters that may be used twice successively


    // Similar chars

    d1to1: new Map([
        ["1", "liîLIÎ"],
        ["2", "zZ"],
................................................................................
    d1toX: new Map([
        ["æ", ["ae",]],
        ["Æ", ["AE",]],
        ["b", ["bb",]],
        ["B", ["BB",]],
        ["c", ["cc", "ss", "qu", "ch"]],
        ["C", ["CC", "SS", "QU", "CH"]],


        ["d", ["dd",]],
        ["D", ["DD",]],
        ["é", ["ai", "ei"]],
        ["É", ["AI", "EI"]],






        ["f", ["ff", "ph"]],
        ["F", ["FF", "PH"]],
        ["g", ["gu", "ge", "gg", "gh"]],
        ["G", ["GU", "GE", "GG", "GH"]],
        ["j", ["jj", "dj"]],
        ["J", ["JJ", "DJ"]],
        ["k", ["qu", "ck", "ch", "cu", "kk", "kh"]],
................................................................................
        ["t", ["tt", "th"]],
        ["T", ["TT", "TH"]],
        ["x", ["cc", "ct", "xx"]],
        ["X", ["CC", "CT", "XX"]],
        ["z", ["ss", "zh"]],
        ["Z", ["SS", "ZH"]],
    ]),

    get1toXReplacement: function (cPrev, cCur, cNext) {
        if (this.aConsonant.has(cCur)  &&  (this.aConsonant.has(cPrev)  ||  this.aConsonant.has(cNext))) {
            return [];
        }
        return this.d1toX.gl_get(cCur, []);
    },

    d2toX: new Map([
        ["an", ["en",]],
        ["AN", ["EN",]],
        ["au", ["eau", "o", "ô"]],
        ["AU", ["EAU", "O", "Ô"]],
        ["en", ["an",]],

Modified gc_core/js/ibdawg.js from [ca747a7a44] to [952ba094d6].

304
305
306
307
308
309
310
311
312
313
314
315
316
317
318
                    }
                    // delete char
                    if (nMaxDel > 0) {
                        this._suggest(oSuggResult, sRemain.slice(1), nMaxSwitch, nMaxDel-1, nMaxHardRepl, nDeep+1, iAddr, sNewWord, true);
                    }
                }
                // Phonetic replacements
                for (let sRepl of char_player.d1toX.gl_get(cCurrent, [])) {
                    this._suggest(oSuggResult, sRepl + sRemain.slice(1), nMaxSwitch, nMaxDel, nMaxHardRepl, nDeep+1, iAddr, sNewWord, true);
                }
                for (let sRepl of char_player.d2toX.gl_get(sRemain.slice(0, 2), [])) {
                    this._suggest(oSuggResult, sRepl + sRemain.slice(2), nMaxSwitch, nMaxDel, nMaxHardRepl, nDeep+1, iAddr, sNewWord, true);
                }
                // Hard replacements
                if (nDeep > 3 && nMaxHardRepl && sRemain.length >= 2) {







|







304
305
306
307
308
309
310
311
312
313
314
315
316
317
318
                    }
                    // delete char
                    if (nMaxDel > 0) {
                        this._suggest(oSuggResult, sRemain.slice(1), nMaxSwitch, nMaxDel-1, nMaxHardRepl, nDeep+1, iAddr, sNewWord, true);
                    }
                }
                // Phonetic replacements
                for (let sRepl of char_player.get1toXReplacement(sNewWord.slice(-1), cCurrent, sRemain.slice(1,2))) {
                    this._suggest(oSuggResult, sRepl + sRemain.slice(1), nMaxSwitch, nMaxDel, nMaxHardRepl, nDeep+1, iAddr, sNewWord, true);
                }
                for (let sRepl of char_player.d2toX.gl_get(sRemain.slice(0, 2), [])) {
                    this._suggest(oSuggResult, sRepl + sRemain.slice(2), nMaxSwitch, nMaxDel, nMaxHardRepl, nDeep+1, iAddr, sNewWord, true);
                }
                // Hard replacements
                if (nDeep > 3 && nMaxHardRepl && sRemain.length >= 2) {

Modified gc_core/py/char_player.py from [b008c1ffec] to [b0152aab01].

16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
...
153
154
155
156
157
158
159
160
161
162
163
164
165
166
167
168
169
170
171
172
173
174
175
176
177
178
...
198
199
200
201
202
203
204







205
206
207
208
209
210
211

def cleanWord (sWord):
    "word simplication before calculating distance between words"
    return sWord.lower().translate(_xTransChars).replace("eau", "o").replace("au", "o")


aVowel = set("aáàâäāeéèêëēiíìîïīoóòôöōuúùûüūyýỳŷÿȳœæAÁÀÂÄĀEÉÈÊËĒIÍÌÎÏĪOÓÒÔÖŌUÚÙÛÜŪYÝỲŶŸȲŒÆ")
aConsonant = set("bcdefghjklmnñpqrstvwxzBCDEFGHJKLMNÑPQRSTVWXZ")
aDouble = set("bcdfjklmnprstzBCDFJKLMNPRSTZ")  # letter that may be used twice successively


# Similar chars

d1to1 = {
    "1": "liîLIÎ",
    "2": "zZ",
................................................................................
d1toX = {
    "æ": ("ae",),
    "Æ": ("AE",),
    "b": ("bb",),
    "B": ("BB",),
    "c": ("cc", "ss", "qu", "ch"),
    "C": ("CC", "SS", "QU", "CH"),
    "ç": ("ss", "cc", "qh", "ch"),
    "Ç": ("SS", "CC", "QH", "CH"),
    "d": ("dd",),
    "D": ("DD",),
    "é": ("ai", "ei"),
    "É": ("AI", "EI"),
    "è": ("ai", "ei"),
    "È": ("AI", "EI"),
    "ê": ("ai", "ei"),
    "Ê": ("AI", "EI"),
    "ë": ("ai", "ei"),
    "Ë": ("AI", "EI"),
    "f": ("ff", "ph"),
    "F": ("FF", "PH"),
    "g": ("gu", "ge", "gg", "gh"),
    "G": ("GU", "GE", "GG", "GH"),
    "j": ("jj", "dj"),
    "J": ("JJ", "DJ"),
    "k": ("qu", "ck", "ch", "cu", "kk", "kh"),
................................................................................
    "t": ("tt", "th"),
    "T": ("TT", "TH"),
    "x": ("cc", "ct", "xx"),
    "X": ("CC", "CT", "XX"),
    "z": ("ss", "zh"),
    "Z": ("SS", "ZH"),
}








d2toX = {
    "an": ("en",),
    "AN": ("EN",),
    "au": ("eau", "o", "ô"),
    "AU": ("EAU", "O", "Ô"),
    "en": ("an",),







|
|







 







<
<




<
<
<
<
<
<







 







>
>
>
>
>
>
>







16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
...
153
154
155
156
157
158
159


160
161
162
163






164
165
166
167
168
169
170
...
190
191
192
193
194
195
196
197
198
199
200
201
202
203
204
205
206
207
208
209
210

def cleanWord (sWord):
    "word simplication before calculating distance between words"
    return sWord.lower().translate(_xTransChars).replace("eau", "o").replace("au", "o")


aVowel = set("aáàâäāeéèêëēiíìîïīoóòôöōuúùûüūyýỳŷÿȳœæAÁÀÂÄĀEÉÈÊËĒIÍÌÎÏĪOÓÒÔÖŌUÚÙÛÜŪYÝỲŶŸȲŒÆ")
aConsonant = set("bcçdfghjklmnñpqrstvwxzBCÇDFGHJKLMNÑPQRSTVWXZ")
aDouble = set("bcçdfjklmnprstzBCÇDFJKLMNPRSTZ")  # letters that may be used twice successively


# Similar chars

d1to1 = {
    "1": "liîLIÎ",
    "2": "zZ",
................................................................................
d1toX = {
    "æ": ("ae",),
    "Æ": ("AE",),
    "b": ("bb",),
    "B": ("BB",),
    "c": ("cc", "ss", "qu", "ch"),
    "C": ("CC", "SS", "QU", "CH"),


    "d": ("dd",),
    "D": ("DD",),
    "é": ("ai", "ei"),
    "É": ("AI", "EI"),






    "f": ("ff", "ph"),
    "F": ("FF", "PH"),
    "g": ("gu", "ge", "gg", "gh"),
    "G": ("GU", "GE", "GG", "GH"),
    "j": ("jj", "dj"),
    "J": ("JJ", "DJ"),
    "k": ("qu", "ck", "ch", "cu", "kk", "kh"),
................................................................................
    "t": ("tt", "th"),
    "T": ("TT", "TH"),
    "x": ("cc", "ct", "xx"),
    "X": ("CC", "CT", "XX"),
    "z": ("ss", "zh"),
    "Z": ("SS", "ZH"),
}


def get1toXReplacement (cPrev, cCur, cNext):
    if cCur in aConsonant  and  (cPrev in aConsonant  or  cNext in aConsonant):
        return ()
    return d1toX.get(cCur, ())


d2toX = {
    "an": ("en",),
    "AN": ("EN",),
    "au": ("eau", "o", "ô"),
    "AU": ("EAU", "O", "Ô"),
    "en": ("an",),

Modified gc_core/py/ibdawg.py from [0203105f4e] to [8ce21115dd].

287
288
289
290
291
292
293
294
295
296
297
298
299
300
301
                    # switching chars
                    if nMaxSwitch:
                        self._suggest(oSuggResult, sRemain[1:2]+sRemain[0:1]+sRemain[2:], nMaxSwitch-1, nMaxDel, nMaxHardRepl, nDeep+1, iAddr, sNewWord, "><",True)
                    # delete char
                    if nMaxDel:
                        self._suggest(oSuggResult, sRemain[1:], nMaxSwitch, nMaxDel-1, nMaxHardRepl, nDeep+1, iAddr, sNewWord, "-"+cCurrent, True)
                # Phonetic replacements
                for sRepl in cp.d1toX.get(cCurrent, ()):
                    self._suggest(oSuggResult, sRepl + sRemain[1:], nMaxSwitch, nMaxDel, nMaxHardRepl, nDeep+1, iAddr, sNewWord, cCurrent+">"+sRepl, True)
                for sRepl in cp.d2toX.get(sRemain[0:2], ()):
                    self._suggest(oSuggResult, sRepl + sRemain[2:], nMaxSwitch, nMaxDel, nMaxHardRepl, nDeep+1, iAddr, sNewWord, sRemain[0:2]+">"+sRepl, True)
                # Hard replacements
                if nDeep > 3 and nMaxHardRepl:
                    for cChar, kAddr in self._getCharArcs(iAddr):
                        if cChar not in cp.d1to1.get(cCurrent, ""):







|







287
288
289
290
291
292
293
294
295
296
297
298
299
300
301
                    # switching chars
                    if nMaxSwitch:
                        self._suggest(oSuggResult, sRemain[1:2]+sRemain[0:1]+sRemain[2:], nMaxSwitch-1, nMaxDel, nMaxHardRepl, nDeep+1, iAddr, sNewWord, "><",True)
                    # delete char
                    if nMaxDel:
                        self._suggest(oSuggResult, sRemain[1:], nMaxSwitch, nMaxDel-1, nMaxHardRepl, nDeep+1, iAddr, sNewWord, "-"+cCurrent, True)
                # Phonetic replacements
                for sRepl in cp.get1toXReplacement(sNewWord[-1:], cCurrent, sRemain[1:2]):
                    self._suggest(oSuggResult, sRepl + sRemain[1:], nMaxSwitch, nMaxDel, nMaxHardRepl, nDeep+1, iAddr, sNewWord, cCurrent+">"+sRepl, True)
                for sRepl in cp.d2toX.get(sRemain[0:2], ()):
                    self._suggest(oSuggResult, sRepl + sRemain[2:], nMaxSwitch, nMaxDel, nMaxHardRepl, nDeep+1, iAddr, sNewWord, sRemain[0:2]+">"+sRepl, True)
                # Hard replacements
                if nDeep > 3 and nMaxHardRepl:
                    for cChar, kAddr in self._getCharArcs(iAddr):
                        if cChar not in cp.d1to1.get(cCurrent, ""):