Grammalecte  Check-in [5cd4863db3]

Overview
Comment:[core] ibdawg: suggestion mechanism update
Downloads: Tarball | ZIP archive | SQL archive
Timelines: family | ancestors | descendants | both | trunk | core
Files: files | file ages | folders
SHA3-256: 5cd4863db37b6c2eb8385e1c0f8567f7cc6ad24d976b007dec5e40e4ce13cc2d
User & Date: olr on 2017-06-26 08:29:50
Other Links: manifest | tags
Context
2017-06-26
08:45
[fr] + 2 tests check-in: 0ff766161c user: olr tags: fr, trunk
08:29
[core] ibdawg: suggestion mechanism update check-in: 5cd4863db3 user: olr tags: core, trunk
07:36
[core] ibdawg: variable renamed check-in: 6d320f3f8d user: olr tags: core, trunk
Changes

Modified gc_core/py/char_player.py from [b5981aec34] to [14cba73827].

    23     23       return sWord.translate(_CHARMAP)
    24     24   
    25     25   
    26     26   # Similar chars
    27     27   
    28     28   d1to1 = {
    29     29       "1": "li",
    30         -    "2": "e",
           30  +    "2": "z",
    31     31       "3": "e",
    32     32       "4": "aà",
    33     33       "5": "ge",
    34     34       "6": "bd",
    35     35       "7": "lt",
    36     36       "8": "b",
    37     37       "9": "gbd",
           38  +    "0": "o",
    38     39   
    39     40       "a": "aàâáäæ",
    40     41       "à": "aàâáäæ",
    41     42       "â": "aàâáäæ",
    42     43       "á": "aàâáäæ",
    43     44       "ä": "aàâáäæ",
    44     45   
................................................................................
   135    136       "z": ("ss", "zh")
   136    137   }
   137    138   
   138    139   d2toX = {
   139    140       "an": ("en",),
   140    141       "en": ("an",),
   141    142       "ai": ("ei", "é", "è", "ê", "ë"),
   142         -    "ei": ("ai", "ait", "ais", "é", "è", "ê", "ë"),
          143  +    "ei": ("ai", "é", "è", "ê", "ë"),
   143    144       "ch": ("sh", "c", "ss"),
   144    145       "ct": ("x", "cc"),
   145    146       "oa": ("oi",),
   146    147       "oi": ("oa", "oie"),
   147    148       "qu": ("q", "cq", "ck", "c", "k"),
          149  +    "ss": ("c", "ç"),
   148    150   }
   149    151   
   150    152   
   151    153   # End of word
   152    154   
   153    155   dFinal1 = {
   154    156       "a": ("as", "at", "ant"),

Modified gc_core/py/ibdawg.py from [9784b032d7] to [303901f49a].

   150    150           if "’" in sWord: # ugly hack
   151    151               sWord = sWord.replace("’", "'")
   152    152           if self.lookup(sWord):
   153    153               return True
   154    154           if sWord[0:1].isupper():
   155    155               if len(sWord) > 1:
   156    156                   if sWord.istitle():
   157         -                    return bool(self.lookup(sWord.lower()))
          157  +                    return self.lookup(sWord.lower())
   158    158                   if sWord.isupper():
   159    159                       if self.bOptNumSigle:
   160    160                           return True
   161         -                    return bool(self.lookup(sWord.lower()) or self.lookup(sWord.capitalize()))
   162         -                return bool(self.lookup(sWord[:1].lower() + sWord[1:]))
          161  +                    return self.lookup(sWord.lower()) or self.lookup(sWord.capitalize())
          162  +                return self.lookup(sWord[:1].lower() + sWord[1:])
   163    163               else:
   164         -                return bool(self.lookup(sWord.lower()))
          164  +                return self.lookup(sWord.lower())
   165    165           return False
   166    166   
   167    167       def lookup (self, sWord):
   168    168           "returns True if <sWord> in dictionary (strict verification)"
   169    169           iAddr = 0
   170    170           for c in sWord:
   171    171               if c not in self.dChar:
   172    172                   return False
   173    173               iAddr = self._lookupArcNode(self.dChar[c], iAddr)
   174    174               if iAddr == None:
   175    175                   return False
   176         -        return int.from_bytes(self.byDic[iAddr:iAddr+self.nBytesArc], byteorder='big') & self._finalNodeMask
          176  +        return bool(int.from_bytes(self.byDic[iAddr:iAddr+self.nBytesArc], byteorder='big') & self._finalNodeMask)
   177    177   
   178    178       def suggest (self, sWord):
   179    179           "returns a set of similar words"
   180    180           # first, we check for similar words
   181    181           #return set(self._suggestWithCrushedUselessChars(cp.clearWord(sWord)))
   182    182           lSugg = self._suggest(sWord)
   183    183           if not lSugg:
................................................................................
   188    188                   lSugg.extend(self._suggestWithCrushedUselessChars(cp.clearWord(sWord)))
   189    189           return set(lSugg)
   190    190   
   191    191       def _suggest (self, sWord, nDeep=0, iAddr=0, sNewWord="", bAvoidLoop=False):
   192    192           # RECURSIVE FUNCTION
   193    193           if not sWord:
   194    194               if int.from_bytes(self.byDic[iAddr:iAddr+self.nBytesArc], byteorder='big') & self._finalNodeMask:
   195         -                show(nDeep, "!!! " + sNewWord + " !!!")
          195  +                show(nDeep, "___" + sNewWord + "___")
   196    196                   return [sNewWord]
   197    197               return []
   198    198           #show(nDeep, "<" + sWord + ">  ===>  " + sNewWord)
   199    199           lSugg = []
   200    200           cCurrent = sWord[0:1]
   201    201           for cChar, jAddr in self._getSimilarArcs(cCurrent, iAddr):
   202    202               #show(nDeep, cChar)
................................................................................
   205    205               #show(nDeep, ":no loop:")
   206    206               if cCurrent == sWord[1:2]:
   207    207                   # same char, we remove 1 char without adding 1 to <sNewWord>
   208    208                   lSugg.extend(self._suggest(sWord[1:], nDeep+1, iAddr, sNewWord))
   209    209               for sRepl in cp.d1toX.get(cCurrent, ()):
   210    210                   #show(nDeep, sRepl)
   211    211                   lSugg.extend(self._suggest(sRepl + sWord[1:], nDeep+1, iAddr, sNewWord, True))
          212  +            for sRepl in cp.d2toX.get(sWord[0:2], ()):
          213  +                #show(nDeep, sRepl)
          214  +                lSugg.extend(self._suggest(sRepl + sWord[2:], nDeep+1, iAddr, sNewWord, True))
   212    215               if len(sWord) == 2:
   213    216                   for sRepl in cp.dFinal2.get(sWord, ()):
   214    217                       #show(nDeep, sRepl)
   215    218                       lSugg.extend(self._suggest(sRepl, nDeep+1, iAddr, sNewWord, True))
   216    219               elif len(sWord) == 1:
   217    220                   #show(nDeep, ":end of word:")
   218    221                   # end of word