Grammalecte  Check-in [6c5050fe91]

Overview
Comment:[core] better suggestion engine
Downloads: Tarball | ZIP archive | SQL archive
Timelines: family | ancestors | core | spellsugg
Files: files | file ages | folders
SHA3-256: 6c5050fe91cccc88d55b0c9ef8a6bb7ab6c590062387638a2f857f97b581e21d
User & Date: olr on 2017-11-21 16:57:45
Other Links: branch diff | manifest | tags
Context
2017-11-21
16:57
[core] better suggestion engine Closed-Leaf check-in: 6c5050fe91 user: olr tags: core, spellsugg
2017-11-09
11:56
[core] ibdawg: suggestion mechanism > reduce 1toX replacements overload (much, much faster) check-in: 767e396f2d user: olr tags: core, spellsugg
Changes

Modified gc_core/js/ibdawg.js from [952ba094d6] to [b7bc0fa0c9].

284
285
286
287
288
289
290
291

292
293







294
295
296
297
298
299
300
...
311
312
313
314
315
316
317
318
319
320
321
322
323
324
325
326
327
328
329
330
331
            }
            for (let sTail of this._getTails(iAddr)) {
                oSuggResult.addSugg(sNewWord+sTail);
            }
            return;
        }
        let cCurrent = sRemain.slice(0, 1);
        for (let [cChar, jAddr] of this._getSimilarCharArcs(cCurrent, iAddr)) {

            this._suggest(oSuggResult, sRemain.slice(1), nMaxSwitch, nMaxDel, nMaxHardRepl, nDeep+1, jAddr, sNewWord+cChar);
        }







        if (!bAvoidLoop) { // avoid infinite loop
            if (sRemain.length > 1) {
                if (cCurrent == sRemain.slice(1, 2)) {
                    // same char, we remove 1 char without adding 1 to <sNewWord>
                    this._suggest(oSuggResult, sRemain.slice(1), nMaxSwitch, nMaxDel, nMaxHardRepl, nDeep+1, iAddr, sNewWord);
                }
                else {
................................................................................
                for (let sRepl of char_player.get1toXReplacement(sNewWord.slice(-1), cCurrent, sRemain.slice(1,2))) {
                    this._suggest(oSuggResult, sRepl + sRemain.slice(1), nMaxSwitch, nMaxDel, nMaxHardRepl, nDeep+1, iAddr, sNewWord, true);
                }
                for (let sRepl of char_player.d2toX.gl_get(sRemain.slice(0, 2), [])) {
                    this._suggest(oSuggResult, sRepl + sRemain.slice(2), nMaxSwitch, nMaxDel, nMaxHardRepl, nDeep+1, iAddr, sNewWord, true);
                }
                // Hard replacements
                if (nDeep > 3 && nMaxHardRepl && sRemain.length >= 2) {
                    for (let [cChar, kAddr] of this._getCharArcs(iAddr)) {
                        if (!char_player.d1to1.gl_get(cCurrent, "").includes(cChar)) {
                            this._suggest(oSuggResult, sRemain.slice(1), nMaxSwitch, nMaxDel, nMaxHardRepl-1, nDeep+1, kAddr, sNewWord+cChar, true);
                        }
                    }
                }
            }
            // end of word
            if (sRemain.length == 2) {
                for (let sRepl of char_player.dFinal2.gl_get(sRemain, [])) {
                    this._suggest(oSuggResult, sRepl, nMaxSwitch, nMaxDel, nMaxHardRepl, nDeep+1, iAddr, sNewWord, true);
                }
            }







|
>
|
|
>
>
>
>
>
>
>







 







|

|



|







284
285
286
287
288
289
290
291
292
293
294
295
296
297
298
299
300
301
302
303
304
305
306
307
308
...
319
320
321
322
323
324
325
326
327
328
329
330
331
332
333
334
335
336
337
338
339
            }
            for (let sTail of this._getTails(iAddr)) {
                oSuggResult.addSugg(sNewWord+sTail);
            }
            return;
        }
        let cCurrent = sRemain.slice(0, 1);
        for (let [cChar, jAddr] of this._getCharArcs(iAddr)) {
            if (char_player.d1to1.gl_get(cCurrent, [cCurrent]).includes(cChar)) {
                this._suggest(oSuggResult, sRemain.slice(1), nMaxSwitch, nMaxDel, nMaxHardRepl, nDeep+1, jAddr, sNewWord+cChar);
            }
            else if (!bAvoidLoop && nMaxHardRepl) {
                this._suggest(oSuggResult, sRemain.slice(1), nMaxSwitch, nMaxDel, nMaxHardRepl-1, nDeep+1, jAddr, sNewWord+cChar, true);
            }
        }
        /*for (let [cChar, jAddr] of this._getSimilarCharArcs(cCurrent, iAddr)) {
            this._suggest(oSuggResult, sRemain.slice(1), nMaxSwitch, nMaxDel, nMaxHardRepl, nDeep+1, jAddr, sNewWord+cChar);
        }*/
        if (!bAvoidLoop) { // avoid infinite loop
            if (sRemain.length > 1) {
                if (cCurrent == sRemain.slice(1, 2)) {
                    // same char, we remove 1 char without adding 1 to <sNewWord>
                    this._suggest(oSuggResult, sRemain.slice(1), nMaxSwitch, nMaxDel, nMaxHardRepl, nDeep+1, iAddr, sNewWord);
                }
                else {
................................................................................
                for (let sRepl of char_player.get1toXReplacement(sNewWord.slice(-1), cCurrent, sRemain.slice(1,2))) {
                    this._suggest(oSuggResult, sRepl + sRemain.slice(1), nMaxSwitch, nMaxDel, nMaxHardRepl, nDeep+1, iAddr, sNewWord, true);
                }
                for (let sRepl of char_player.d2toX.gl_get(sRemain.slice(0, 2), [])) {
                    this._suggest(oSuggResult, sRepl + sRemain.slice(2), nMaxSwitch, nMaxDel, nMaxHardRepl, nDeep+1, iAddr, sNewWord, true);
                }
                // Hard replacements
                /*if (nDeep > 3 && nMaxHardRepl && sRemain.length >= 2) {
                    for (let [cChar, kAddr] of this._getCharArcs(iAddr)) {
                        if (!char_player.d1to1.gl_get(cCurrent, [cCurrent]).includes(cChar)) {
                            this._suggest(oSuggResult, sRemain.slice(1), nMaxSwitch, nMaxDel, nMaxHardRepl-1, nDeep+1, kAddr, sNewWord+cChar, true);
                        }
                    }
                }*/
            }
            // end of word
            if (sRemain.length == 2) {
                for (let sRepl of char_player.dFinal2.gl_get(sRemain, [])) {
                    this._suggest(oSuggResult, sRepl, nMaxSwitch, nMaxDel, nMaxHardRepl, nDeep+1, iAddr, sNewWord, true);
                }
            }

Modified gc_core/py/ibdawg.py from [8ce21115dd] to [dcdab2db72].

272
273
274
275
276
277
278





279
280
281
282
283
284
285
286
287
...
292
293
294
295
296
297
298
299
300
301
302
303
304
305
306
307
308
309
        if not sRemain:
            if int.from_bytes(self.byDic[iAddr:iAddr+self.nBytesArc], byteorder='big') & self._finalNodeMask:
                oSuggResult.addSugg(sNewWord, nDeep)
            for sTail in self._getTails(iAddr):
                oSuggResult.addSugg(sNewWord+sTail, nDeep)
            return
        cCurrent = sRemain[0:1]





        for cChar, jAddr in self._getSimilarCharArcs(cCurrent, iAddr):
            self._suggest(oSuggResult, sRemain[1:], nMaxSwitch, nMaxDel, nMaxHardRepl, nDeep+1, jAddr, sNewWord+cChar, "*")
        if not bAvoidLoop: # avoid infinite loop
            if len(sRemain) > 1:
                if cCurrent == sRemain[1:2]:
                    # same char, we remove 1 char without adding 1 to <sNewWord>
                    self._suggest(oSuggResult, sRemain[1:], nMaxSwitch, nMaxDel, nMaxHardRepl, nDeep+1, iAddr, sNewWord, cCurrent+"/2")
                else:
                    # switching chars
................................................................................
                        self._suggest(oSuggResult, sRemain[1:], nMaxSwitch, nMaxDel-1, nMaxHardRepl, nDeep+1, iAddr, sNewWord, "-"+cCurrent, True)
                # Phonetic replacements
                for sRepl in cp.get1toXReplacement(sNewWord[-1:], cCurrent, sRemain[1:2]):
                    self._suggest(oSuggResult, sRepl + sRemain[1:], nMaxSwitch, nMaxDel, nMaxHardRepl, nDeep+1, iAddr, sNewWord, cCurrent+">"+sRepl, True)
                for sRepl in cp.d2toX.get(sRemain[0:2], ()):
                    self._suggest(oSuggResult, sRepl + sRemain[2:], nMaxSwitch, nMaxDel, nMaxHardRepl, nDeep+1, iAddr, sNewWord, sRemain[0:2]+">"+sRepl, True)
                # Hard replacements
                if nDeep > 3 and nMaxHardRepl:
                    for cChar, kAddr in self._getCharArcs(iAddr):
                        if cChar not in cp.d1to1.get(cCurrent, ""):
                            self._suggest(oSuggResult, sRemain[1:], nMaxSwitch, nMaxDel, nMaxHardRepl-1, nDeep+1, kAddr, sNewWord+cChar, "[["+cChar+"]]", True)
            # end of word
            if len(sRemain) == 2:
                for sRepl in cp.dFinal2.get(sRemain, ()):
                    self._suggest(oSuggResult, sRepl, nMaxSwitch, nMaxDel, nMaxHardRepl, nDeep+1, iAddr, sNewWord, sRemain + " >> " + sRepl, True)
            elif len(sRemain) == 1:
                self._suggest(oSuggResult, "", nMaxSwitch, nMaxDel, nMaxHardRepl, nDeep+1, iAddr, sNewWord, sRemain + " [last char removed] ", True) # remove last char and go on
                for sRepl in cp.dFinal1.get(sRemain, ()):







>
>
>
>
>
|
|







 







|
|
|
|







272
273
274
275
276
277
278
279
280
281
282
283
284
285
286
287
288
289
290
291
292
...
297
298
299
300
301
302
303
304
305
306
307
308
309
310
311
312
313
314
        if not sRemain:
            if int.from_bytes(self.byDic[iAddr:iAddr+self.nBytesArc], byteorder='big') & self._finalNodeMask:
                oSuggResult.addSugg(sNewWord, nDeep)
            for sTail in self._getTails(iAddr):
                oSuggResult.addSugg(sNewWord+sTail, nDeep)
            return
        cCurrent = sRemain[0:1]
        for cChar, jAddr in self._getCharArcs(iAddr):
            if cChar in cp.d1to1.get(cCurrent, cCurrent):
                self._suggest(oSuggResult, sRemain[1:], nMaxSwitch, nMaxDel, nMaxHardRepl, nDeep+1, jAddr, sNewWord+cChar, "*")
            elif not bAvoidLoop and nMaxHardRepl:
                self._suggest(oSuggResult, sRemain[1:], nMaxSwitch, nMaxDel, nMaxHardRepl-1, nDeep+1, jAddr, sNewWord+cChar, "[["+cChar+"]]", True)
        #for cChar, jAddr in self._getSimilarCharArcs(cCurrent, iAddr):
        #    self._suggest(oSuggResult, sRemain[1:], nMaxSwitch, nMaxDel, nMaxHardRepl, nDeep+1, jAddr, sNewWord+cChar, "*")
        if not bAvoidLoop: # avoid infinite loop
            if len(sRemain) > 1:
                if cCurrent == sRemain[1:2]:
                    # same char, we remove 1 char without adding 1 to <sNewWord>
                    self._suggest(oSuggResult, sRemain[1:], nMaxSwitch, nMaxDel, nMaxHardRepl, nDeep+1, iAddr, sNewWord, cCurrent+"/2")
                else:
                    # switching chars
................................................................................
                        self._suggest(oSuggResult, sRemain[1:], nMaxSwitch, nMaxDel-1, nMaxHardRepl, nDeep+1, iAddr, sNewWord, "-"+cCurrent, True)
                # Phonetic replacements
                for sRepl in cp.get1toXReplacement(sNewWord[-1:], cCurrent, sRemain[1:2]):
                    self._suggest(oSuggResult, sRepl + sRemain[1:], nMaxSwitch, nMaxDel, nMaxHardRepl, nDeep+1, iAddr, sNewWord, cCurrent+">"+sRepl, True)
                for sRepl in cp.d2toX.get(sRemain[0:2], ()):
                    self._suggest(oSuggResult, sRepl + sRemain[2:], nMaxSwitch, nMaxDel, nMaxHardRepl, nDeep+1, iAddr, sNewWord, sRemain[0:2]+">"+sRepl, True)
                # Hard replacements
                #if nDeep > 3 and nMaxHardRepl:
                #    for cChar, kAddr in self._getCharArcs(iAddr):
                #        if cChar not in cp.d1to1.get(cCurrent, ""):
                #            self._suggest(oSuggResult, sRemain[1:], nMaxSwitch, nMaxDel, nMaxHardRepl-1, nDeep+1, kAddr, sNewWord+cChar, "[["+cChar+"]]", True)
            # end of word
            if len(sRemain) == 2:
                for sRepl in cp.dFinal2.get(sRemain, ()):
                    self._suggest(oSuggResult, sRepl, nMaxSwitch, nMaxDel, nMaxHardRepl, nDeep+1, iAddr, sNewWord, sRemain + " >> " + sRepl, True)
            elif len(sRemain) == 1:
                self._suggest(oSuggResult, "", nMaxSwitch, nMaxDel, nMaxHardRepl, nDeep+1, iAddr, sNewWord, sRemain + " [last char removed] ", True) # remove last char and go on
                for sRepl in cp.dFinal1.get(sRemain, ()):

Modified gc_lang/fr/config.ini from [c76d410d86] to [bd9c32e343].

29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
oxt_identifier = French.linguistic.resources.from.Dicollecte.by.OlivierR

# Firefox
fx_identifier = French-GC@grammalecte.net
fx_name = Grammalecte [fr]

win_fx_dev_path = C:\Program Files\Firefox Developer Edition\firefox.exe
win_fx_nightly_path = C:\Program Files (x86)\Nightly\firefox.exe
linux_fx_dev_path = /usr/bin/firefox
linux_fx_nightly_path = /usr/bin/firefox


# Thunderbird
tb_identifier = French-GC-TB@grammalecte.net
tb_name = Grammalecte [fr]







|







29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
oxt_identifier = French.linguistic.resources.from.Dicollecte.by.OlivierR

# Firefox
fx_identifier = French-GC@grammalecte.net
fx_name = Grammalecte [fr]

win_fx_dev_path = C:\Program Files\Firefox Developer Edition\firefox.exe
win_fx_nightly_path = C:\Program Files\Nightly\firefox.exe
linux_fx_dev_path = /usr/bin/firefox
linux_fx_nightly_path = /usr/bin/firefox


# Thunderbird
tb_identifier = French-GC-TB@grammalecte.net
tb_name = Grammalecte [fr]