Grammalecte  Check-in [982c1b5eb0]

Overview
Comment:[graphspell] ibdawg > suggest(): seek first simple combinations
Downloads: Tarball | ZIP archive | SQL archive
Timelines: family | ancestors | descendants | both | trunk | graphspell
Files: files | file ages | folders
SHA3-256: 982c1b5eb083ff0cc78f2fb608eb7c729543b8f1d696ef0ef6576fbc2c093c57
User & Date: olr on 2021-02-18 08:49:19
Other Links: manifest | tags
Context
2021-02-18
10:26
[fr] remove old useless tests check-in: 15a51e51ca user: olr tags: fr, trunk
08:49
[graphspell] ibdawg > suggest(): seek first simple combinations check-in: 982c1b5eb0 user: olr tags: graphspell, trunk
08:48
[fr] faux positifs check-in: 950c661775 user: olr tags: fr, trunk
Changes

Modified gc_lang/fr/modules/tests_modules.py from [5c8bb6ae99] to [2556c753ce].

    56     56   
    57     57       def test_suggest (self):
    58     58           for sWord in [
    59     59               "déelirranttesss", "vallidasion", "Emilie", "exibission", "ditirembique", "jai", "email",
    60     60               "fatiqué", "coeur", "trèèèèèèèèès", "vraaaaiiiimeeeeennnt", "apele", "Co2",
    61     61               "emmppâiiiller", "testt", "apelaion", "exsepttion", "sintaxik", "ebriete", "ennormmement"
    62     62           ]:
    63         -            for lSugg in self.oSpellChecker.suggest(sWord):
    64         -                self.assertTrue(len(lSugg) > 0)
    65     63               #with timeblock(sWord):
    66         -            #    aSugg = self.oSpellChecker.suggest(sWord)
    67         -            #    print(sWord, "->", " ".join(aSugg))
           64  +            for lSugg in self.oSpellChecker.suggest(sWord):
           65  +                #print(sWord, "->", " ".join(lSugg))
           66  +                self.assertTrue(len(lSugg) > 0)
           67  +
    68     68   
    69     69       def test_lemmas (self):
    70     70           for sWord, sInfi in [
    71     71               ("suis",        "suivre"),
    72     72               ("suis",        "être"),
    73     73               ("a",           "avoir"),
    74     74               ("a",           "a"),

Modified graphspell-js/ibdawg.js from [44a920520f] to [20fbadf805].

    43     43           if (this.aAllSugg.has(sSugg)) {
    44     44               return;
    45     45           }
    46     46           this.aAllSugg.add(sSugg);
    47     47           // jaro 0->1 1 les chaines sont égale
    48     48           let nDistJaro = 1 - str_transform.distanceJaroWinkler(this.sSimplifiedWord, str_transform.simplifyWord(sSugg));
    49     49           let nDist = Math.floor(nDistJaro * 10);
           50  +        if (nDist < this.nMinDist) {
           51  +            this.nMinDist = nDist;
           52  +        }
    50     53           if (nDistJaro < .11) {        // Best suggestions
    51     54               this.dBestSugg.set(sSugg, Math.round(nDistJaro*1000));
    52     55               if (this.dBestSugg.size > this.nBestSuggLimit) {
    53     56                   this.nDistLimit = -1; // make suggest() to end search
    54     57               }
    55     58           } else if (nDistJaro < .33) { // Good suggestions
    56     59               this.dGoodSugg.set(sSugg, Math.round(nDistJaro*1000));
    57     60               if (this.dGoodSugg.size > this.nGoodSuggLimit) {
    58     61                   this.nDistLimit = -1; // make suggest() to end search
    59     62               }
    60         -        } else {
    61         -            if (nDist < this.nMinDist) {
    62         -                this.nMinDist = nDist;
    63         -            }
    64         -            this.nDistLimit = Math.min(this.nDistLimit, this.nMinDist);
    65     63           }
    66         -        if (nDist <= this.nDistLimit) {
    67         -            if (nDist < this.nMinDist) {
    68         -                this.nMinDist = nDist;
    69         -            }
    70         -            this.nDistLimit = Math.min(this.nDistLimit, this.nMinDist+1);
    71         -        }
           64  +        this.nDistLimit = Math.min(this.nDistLimit, this.nMinDist+1);
    72     65       }
    73     66   
    74     67       getSuggestions () {
    75     68           // return a list of suggestions
    76     69           let lRes = [];
    77     70           if (this.dBestSugg.size > 0) {
    78     71               // sort only with simplified words
................................................................................
   340    333           let nMaxJump = Math.max(Math.floor(sWord.length / 4), 1);
   341    334           let oSuggResult = new SuggResult(sWord, nSuggLimit);
   342    335           sWord = str_transform.cleanWord(sWord);
   343    336           if (bSplitTrailingNumbers) {
   344    337               this._splitTrailingNumbers(oSuggResult, sWord);
   345    338           }
   346    339           this._splitSuggest(oSuggResult, sWord);
          340  +        this._suggest(oSuggResult, sWord);
   347    341           this._suggest(oSuggResult, sWord, nMaxSwitch, nMaxDel, nMaxHardRepl, nMaxJump);
   348    342           let aSugg = oSuggResult.getSuggestions();
   349    343           if (this.lexicographer) {
   350    344               aSugg = this.lexicographer.filterSugg(aSugg);
   351    345           }
   352    346           if (sSfx || sPfx) {
   353    347               // we add what we removed

Modified graphspell/ibdawg.py from [13d2327263] to [e27ae4ab79].

    59     59       def addSugg (self, sSugg, nDeep=0):
    60     60           "add a suggestion"
    61     61           if sSugg in self.aAllSugg:
    62     62               return
    63     63           self.aAllSugg.add(sSugg)
    64     64           nDistJaro = 1 - st.distanceJaroWinkler(self.sSimplifiedWord, st.simplifyWord(sSugg))
    65     65           nDist = floor(nDistJaro * 10)
           66  +        if nDist < self.nMinDist:
           67  +            self.nMinDist = nDist
    66     68           #logging.info((nDeep * "  ") + "__" + sSugg + "__ " + str(round(nDistJaro*1000)))
    67     69           if nDistJaro < .11:     # Best suggestions
    68     70               self.dBestSugg[sSugg] = round(nDistJaro*1000)
    69     71               if len(self.dBestSugg) > self.nBestSuggLimit:
    70     72                   self.nDistLimit = -1  # make suggest() to end search
    71     73           elif nDistJaro < .33:   # Good suggestions
    72     74               self.dGoodSugg[sSugg] = round(nDistJaro*1000)
    73     75               if len(self.dGoodSugg) > self.nGoodSuggLimit:
    74     76                   self.nDistLimit = -1  # make suggest() to end search
    75         -        else:
    76         -            if nDist < self.nMinDist:
    77         -                self.nMinDist = nDist
    78         -            self.nDistLimit = min(self.nDistLimit, self.nMinDist)
    79         -        if nDist <= self.nDistLimit:
    80         -            if nDist < self.nMinDist:
    81         -                self.nMinDist = nDist
    82         -            self.nDistLimit = min(self.nDistLimit, self.nMinDist+1)
           77  +        self.nDistLimit = min(self.nDistLimit, self.nMinDist+1)
    83     78   
    84     79       def getSuggestions (self):
    85     80           "return a list of suggestions"
    86     81           # we sort the better results with the original word
    87     82           lRes = []
    88     83           if len(self.dBestSugg) > 0:
    89     84               # sort only with simplified words
................................................................................
   242    237           nMaxHardRepl = max((len(sWord) - 5) // 4, 1)
   243    238           nMaxJump = max(len(sWord) // 4, 1)
   244    239           oSuggResult = SuggResult(sWord, nSuggLimit)
   245    240           sWord = st.cleanWord(sWord)
   246    241           if bSplitTrailingNumbers:
   247    242               self._splitTrailingNumbers(oSuggResult, sWord)
   248    243           self._splitSuggest(oSuggResult, sWord)
          244  +        self._suggest(oSuggResult, sWord)
   249    245           self._suggest(oSuggResult, sWord, nMaxSwitch, nMaxDel, nMaxHardRepl, nMaxJump)
   250    246           aSugg = oSuggResult.getSuggestions()
   251    247           if self.lexicographer:
   252    248               aSugg = self.lexicographer.filterSugg(aSugg)
   253    249           if sSfx or sPfx:
   254    250               # we add what we removed
   255    251               return list(map(lambda sSug: sPfx + sSug + sSfx, aSugg))