Grammalecte  Check-in [d22466bd67]

Overview
Comment:[core] sort first range of suggestions + code clarification
Downloads: Tarball | ZIP archive | SQL archive
Timelines: family | ancestors | descendants | both | core | spellsugg
Files: files | file ages | folders
SHA3-256: d22466bd67f2df09b8e8d933ea100cc4a7dfa5e46f26c6396581e6a3fecb21ae
User & Date: olr on 2017-11-07 19:28:50
Other Links: branch diff | manifest | tags
Context
2017-11-07
19:56
[core] ibdawg: suggest2 > char priority check-in: 8ea89d19b5 user: olr tags: core, spellsugg
19:28
[core] sort first range of suggestions + code clarification check-in: d22466bd67 user: olr tags: core, spellsugg
18:25
[core][bug] ibdawg: avoid storing several times the same suggestion check-in: 64ccfa7e38 user: olr tags: core, spellsugg
Changes

Modified gc_core/py/ibdawg.py from [2152c0fca3] to [69a64ae917].

37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69


70
71
72
73
74
75
76
...
265
266
267
268
269
270
271
272
273
274
275
276
277
278
279
280
281
282
283
284
285
286
...
319
320
321
322
323
324
325

326
327
328
329
330
331
332
...
337
338
339
340
341
342
343
344
345
346
347
348
349
350
351
...
358
359
360
361
362
363
364
365
366
367
368
369
370
371
372
373
374
375
376
377
378
379
380
381
382
        self.nDistLimit = nDistLimit  if nDistLimit >= 0  else  (len(sWord) // 3) + 1
        self.nMinDist = 1000
        self.aSugg = set()
        self.dSugg = { 0: [],  1: [] }

    def addSugg (self, sSugg, nDeep=0):
        "add a suggestion"
        #print(sSugg)
        if sSugg not in self.aSugg:
            nDist = st.distanceDamerauLevenshtein(self.sCleanWord, cp.cleanWord(sSugg))
            if nDist <= self.nDistLimit:
                if nDist not in self.dSugg:
                    self.dSugg[nDist] = []
                self.dSugg[nDist].append(sSugg)
                self.aSugg.add(sSugg)
                #logging.info((nDeep * "  ") + "__" + sSugg + "__")
                if nDist < self.nMinDist:
                    self.nMinDist = nDist
                self.nDistLimit = min(self.nDistLimit, self.nMinDist+2)

    def getSuggestions (self, nSuggLimit=10, nDistLimit=-1):
        "return a list of suggestions"
        lRes = []
        #if self.dSugg[0]:
        #    # we sort the better results with the original word
        #    self.dSugg[0].sort(key=lambda sSugg: cp.distanceDamerauLevenshtein(self.sWord, sSugg))
        for lSugg in self.dSugg.values():
            lRes.extend(lSugg)
            if len(lRes) > nSuggLimit:
                break
        lRes = list(cp.filterSugg(lRes))
        if self.sWord.istitle():
            lRes = list(map(lambda sSugg: sSugg.title(), lRes))


        return lRes[:nSuggLimit]

    def reset (self):
        self.aSugg.clear()
        self.dSugg.clear()


................................................................................
        return aSugg

    def _suggest (self, oSuggResult, sRemain, nMaxDel=0, nMaxHardRepl=0, nDeep=0, iAddr=0, sNewWord="", sAction="", bAvoidLoop=False):
        # recursive function
        #logging.info((nDeep * "  ") + sNewWord + ":" + sRemain + " · " + sAction)
        if not sRemain:
            if int.from_bytes(self.byDic[iAddr:iAddr+self.nBytesArc], byteorder='big') & self._finalNodeMask:
                #logging.info((nDeep * "  ") + "__" + sNewWord + "__")
                oSuggResult.addSugg(sNewWord)
            for sTail in self._getTails(iAddr):
                #logging.info((nDeep * "  ") + "__" + sNewWord+sTail + "__")
                oSuggResult.addSugg(sNewWord+sTail)
            return
        cCurrent = sRemain[0:1]
        for cChar, jAddr in self._getSimilarArcs(cCurrent, iAddr):
            self._suggest(oSuggResult, sRemain[1:], nMaxDel, nMaxHardRepl, nDeep+1, jAddr, sNewWord+cChar, "*")
        if not bAvoidLoop: # avoid infinite loop
            if cCurrent == sRemain[1:2]:
                # same char, we remove 1 char without adding 1 to <sNewWord>
                self._suggest(oSuggResult, sRemain[1:], nMaxDel, nMaxHardRepl, nDeep+1, iAddr, sNewWord, cCurrent+"/2")
            else:
                # switching chars
................................................................................
        aSugg = oSuggResult.getSuggestions()
        if sSfx or sPfx:
            # we add what we removed
            return list(map(lambda sSug: sPfx + sSug + sSfx, aSugg))
        return aSugg

    def _suggest2 (self, oSuggResult, nDeep=0, iAddr=0, sNewWord=""):

        #logging.info((nDeep * "  ") + sNewWord)
        if nDeep >= oSuggResult.nDistLimit:
            sCleanNewWord = cp.cleanWord(sNewWord)
            if st.distanceSift4(oSuggResult.sCleanWord[:len(sCleanNewWord)], sCleanNewWord) > oSuggResult.nDistLimit:
                return
        if int.from_bytes(self.byDic[iAddr:iAddr+self.nBytesArc], byteorder='big') & self._finalNodeMask:
            #logging.info((nDeep * "  ") + "__" + sNewWord + "__")
................................................................................

    def _getCharArcs (self, iAddr):
        "generator: yield all chars and addresses from node at address <iAddr>"
        for nVal, jAddr in self._getArcs(iAddr):
            if nVal < self.nChar:
                yield (self.dCharVal[nVal], jAddr)

    def _getSimilarArcs (self, cChar, iAddr):
        "generator: yield similar char of <cChar> and address of the following node"
        for c in cp.d1to1.get(cChar, [cChar]):
            if c in self.dChar:
                jAddr = self._lookupArcNode(self.dChar[c], iAddr)
                if jAddr:
                    yield (c, jAddr)

................................................................................
                    aTails.add(sTail + self.dCharVal[nVal])
                if n and not aTails:
                    aTails.update(self._getTails(jAddr, sTail+self.dCharVal[nVal], n-1))
        return aTails

    def drawPath (self, sWord, iAddr=0):
        "show the path taken by <sWord> in the graph"
        cChar = sWord[0:1]  if sWord  else " "
        iPos = -1
        n = 0
        print(cChar + ": ", end="")
        for nVal, jAddr in self._getArcs(iAddr):
            if nVal in self.dCharVal:
                print(self.dCharVal[nVal], end="")
                if self.dCharVal[nVal] == sWord[0:1]:
                    iNextNodeAddr = jAddr
                    iPos = n
                n += 1
        if not sWord:
            return
        if iPos >= 0:
            print("\n   "+ " " * iPos + "|")
            self.drawPath(sWord[1:], iNextNodeAddr)

    def select (self, sPattern=""):







|







<







|
|
|







>
>







 







<


<



|







 







>







 







|







 







|


|
|
|
<
|
|
|
|







37
38
39
40
41
42
43
44
45
46
47
48
49
50
51

52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
...
266
267
268
269
270
271
272

273
274

275
276
277
278
279
280
281
282
283
284
285
...
318
319
320
321
322
323
324
325
326
327
328
329
330
331
332
...
337
338
339
340
341
342
343
344
345
346
347
348
349
350
351
...
358
359
360
361
362
363
364
365
366
367
368
369
370

371
372
373
374
375
376
377
378
379
380
381
        self.nDistLimit = nDistLimit  if nDistLimit >= 0  else  (len(sWord) // 3) + 1
        self.nMinDist = 1000
        self.aSugg = set()
        self.dSugg = { 0: [],  1: [] }

    def addSugg (self, sSugg, nDeep=0):
        "add a suggestion"
        #logging.info((nDeep * "  ") + "__" + sSugg + "__")
        if sSugg not in self.aSugg:
            nDist = st.distanceDamerauLevenshtein(self.sCleanWord, cp.cleanWord(sSugg))
            if nDist <= self.nDistLimit:
                if nDist not in self.dSugg:
                    self.dSugg[nDist] = []
                self.dSugg[nDist].append(sSugg)
                self.aSugg.add(sSugg)

                if nDist < self.nMinDist:
                    self.nMinDist = nDist
                self.nDistLimit = min(self.nDistLimit, self.nMinDist+2)

    def getSuggestions (self, nSuggLimit=10, nDistLimit=-1):
        "return a list of suggestions"
        lRes = []
        if self.dSugg[0]:
            # we sort the better results with the original word
            self.dSugg[0].sort(key=lambda sSugg: st.distanceDamerauLevenshtein(self.sWord, sSugg))
        for lSugg in self.dSugg.values():
            lRes.extend(lSugg)
            if len(lRes) > nSuggLimit:
                break
        lRes = list(cp.filterSugg(lRes))
        if self.sWord.istitle():
            lRes = list(map(lambda sSugg: sSugg.title(), lRes))
        elif self.sWord.isupper():
            lRes = list(map(lambda sSugg: sSugg.upper(), lRes))
        return lRes[:nSuggLimit]

    def reset (self):
        self.aSugg.clear()
        self.dSugg.clear()


................................................................................
        return aSugg

    def _suggest (self, oSuggResult, sRemain, nMaxDel=0, nMaxHardRepl=0, nDeep=0, iAddr=0, sNewWord="", sAction="", bAvoidLoop=False):
        # recursive function
        #logging.info((nDeep * "  ") + sNewWord + ":" + sRemain + " · " + sAction)
        if not sRemain:
            if int.from_bytes(self.byDic[iAddr:iAddr+self.nBytesArc], byteorder='big') & self._finalNodeMask:

                oSuggResult.addSugg(sNewWord)
            for sTail in self._getTails(iAddr):

                oSuggResult.addSugg(sNewWord+sTail)
            return
        cCurrent = sRemain[0:1]
        for cChar, jAddr in self._getSimilarCharArcs(cCurrent, iAddr):
            self._suggest(oSuggResult, sRemain[1:], nMaxDel, nMaxHardRepl, nDeep+1, jAddr, sNewWord+cChar, "*")
        if not bAvoidLoop: # avoid infinite loop
            if cCurrent == sRemain[1:2]:
                # same char, we remove 1 char without adding 1 to <sNewWord>
                self._suggest(oSuggResult, sRemain[1:], nMaxDel, nMaxHardRepl, nDeep+1, iAddr, sNewWord, cCurrent+"/2")
            else:
                # switching chars
................................................................................
        aSugg = oSuggResult.getSuggestions()
        if sSfx or sPfx:
            # we add what we removed
            return list(map(lambda sSug: sPfx + sSug + sSfx, aSugg))
        return aSugg

    def _suggest2 (self, oSuggResult, nDeep=0, iAddr=0, sNewWord=""):
        # recursive function
        #logging.info((nDeep * "  ") + sNewWord)
        if nDeep >= oSuggResult.nDistLimit:
            sCleanNewWord = cp.cleanWord(sNewWord)
            if st.distanceSift4(oSuggResult.sCleanWord[:len(sCleanNewWord)], sCleanNewWord) > oSuggResult.nDistLimit:
                return
        if int.from_bytes(self.byDic[iAddr:iAddr+self.nBytesArc], byteorder='big') & self._finalNodeMask:
            #logging.info((nDeep * "  ") + "__" + sNewWord + "__")
................................................................................

    def _getCharArcs (self, iAddr):
        "generator: yield all chars and addresses from node at address <iAddr>"
        for nVal, jAddr in self._getArcs(iAddr):
            if nVal < self.nChar:
                yield (self.dCharVal[nVal], jAddr)

    def _getSimilarCharArcs (self, cChar, iAddr):
        "generator: yield similar char of <cChar> and address of the following node"
        for c in cp.d1to1.get(cChar, [cChar]):
            if c in self.dChar:
                jAddr = self._lookupArcNode(self.dChar[c], iAddr)
                if jAddr:
                    yield (c, jAddr)

................................................................................
                    aTails.add(sTail + self.dCharVal[nVal])
                if n and not aTails:
                    aTails.update(self._getTails(jAddr, sTail+self.dCharVal[nVal], n-1))
        return aTails

    def drawPath (self, sWord, iAddr=0):
        "show the path taken by <sWord> in the graph"
        c1 = sWord[0:1]  if sWord  else " "
        iPos = -1
        n = 0
        print(c1 + ": ", end="")
        for c2, jAddr in self._getCharArcs(iAddr):
            print(c2, end="")

            if c2 == sWord[0:1]:
                iNextNodeAddr = jAddr
                iPos = n
            n += 1
        if not sWord:
            return
        if iPos >= 0:
            print("\n   "+ " " * iPos + "|")
            self.drawPath(sWord[1:], iNextNodeAddr)

    def select (self, sPattern=""):