Grammalecte  Check-in [622060334c]

Overview
Comment:[graphspell] suggestions: split word at apostrophes and check each part
Downloads: Tarball | ZIP archive | SQL archive
Timelines: family | ancestors | descendants | both | trunk | graphspell
Files: files | file ages | folders
SHA3-256: 622060334cf37de6c5957200183b914da68c5b804a91d1eedb175f7fe8d01e9c
User & Date: olr on 2018-11-23 15:33:45
Other Links: manifest | tags
Context
2018-11-23
16:05
[graphspell] char_player: another simplification method for sound “é” to avoid oversimplification check-in: 93a0b84b63 user: olr tags: graphspell, trunk
15:33
[graphspell] suggestions: split word at apostrophes and check each part check-in: 622060334c user: olr tags: graphspell, trunk
15:32
[fr] ajustements check-in: def3f4276e user: olr tags: fr, trunk
Changes

Modified graphspell-js/ibdawg.js from [edb211270b] to [87959bb3f3].

   328    328           let sSfx = "";
   329    329           [sPfx, sWord, sSfx] = char_player.cut(sWord);
   330    330           let nMaxSwitch = Math.max(Math.floor(sWord.length / 3), 1);
   331    331           let nMaxDel = Math.floor(sWord.length / 5);
   332    332           let nMaxHardRepl = Math.max(Math.floor((sWord.length - 5) / 4), 1);
   333    333           let nMaxJump = Math.max(Math.floor(sWord.length / 4), 1);
   334    334           let oSuggResult = new SuggResult(sWord);
          335  +        this._splitSuggest(oSuggResult, sWord);
   335    336           this._suggest(oSuggResult, sWord, nMaxSwitch, nMaxDel, nMaxHardRepl, nMaxJump);
   336    337           let aSugg = oSuggResult.getSuggestions(nSuggLimit);
   337    338           if (sSfx || sPfx) {
   338    339               // we add what we removed
   339    340               return aSugg.map( (sSugg) => { return sPfx + sSugg + sSfx; } );
   340    341           }
   341    342           //console.timeEnd("Suggestions for " + sWord);
   342    343           return aSugg;
   343    344       }
          345  +
          346  +    _splitSuggest (oSuggResult, sWord) {
          347  +        for (let cSplitter of "'’") {
          348  +            if (sWord.includes(cSplitter)) {
          349  +                let [sWord1, sWord2] = sWord.split(cSplitter, 2);
          350  +                if (this.isValid(sWord1) && this.isValid(sWord2)) {
          351  +                    oSuggResult.addSugg(sWord1+" "+sWord2);
          352  +                }
          353  +            }
          354  +        }
          355  +    }
   344    356   
   345    357       _suggest (oSuggResult, sRemain, nMaxSwitch=0, nMaxDel=0, nMaxHardRepl=0, nMaxJump=0, nDist=0, nDeep=0, iAddr=0, sNewWord="", bAvoidLoop=false) {
   346    358           // returns a set of suggestions
   347    359           // recursive function
   348    360           if (sRemain == "") {
   349    361               if (this._convBytesToInteger(this.byDic.slice(iAddr, iAddr+this.nBytesArc)) & this._finalNodeMask) {
   350    362                   oSuggResult.addSugg(sNewWord);

Modified graphspell/ibdawg.py from [59273fbe60] to [630526bab1].

   299    299           sWord = cp.spellingNormalization(sWord)
   300    300           sPfx, sWord, sSfx = cp.cut(sWord)
   301    301           nMaxSwitch = max(len(sWord) // 3, 1)
   302    302           nMaxDel = len(sWord) // 5
   303    303           nMaxHardRepl = max((len(sWord) - 5) // 4, 1)
   304    304           nMaxJump = max(len(sWord) // 4, 1)
   305    305           oSuggResult = SuggResult(sWord)
          306  +        self._splitSuggest(oSuggResult, sWord)
   306    307           self._suggest(oSuggResult, sWord, nMaxSwitch, nMaxDel, nMaxHardRepl, nMaxJump)
   307    308           aSugg = oSuggResult.getSuggestions(nSuggLimit)
   308    309           if sSfx or sPfx:
   309    310               # we add what we removed
   310    311               return list(map(lambda sSug: sPfx + sSug + sSfx, aSugg))
   311    312           return aSugg
   312    313   
          314  +    def _splitSuggest (self, oSuggResult, sWord):
          315  +        for cSplitter in "'’":
          316  +            if cSplitter in sWord:
          317  +                sWord1, sWord2 = sWord.split(cSplitter, 1)
          318  +                if self.isValid(sWord1) and self.isValid(sWord2):
          319  +                    oSuggResult.addSugg(sWord1+" "+sWord2)
          320  +
   313    321       def _suggest (self, oSuggResult, sRemain, nMaxSwitch=0, nMaxDel=0, nMaxHardRepl=0, nMaxJump=0, nDist=0, nDeep=0, iAddr=0, sNewWord="", bAvoidLoop=False):
   314    322           # recursive function
   315    323           #logging.info((nDeep * "  ") + sNewWord + ":" + sRemain)
   316    324           if not sRemain:
   317    325               if int.from_bytes(self.byDic[iAddr:iAddr+self.nBytesArc], byteorder='big') & self._finalNodeMask:
   318    326                   oSuggResult.addSugg(sNewWord, nDeep)
   319    327               for sTail in self._getTails(iAddr):