Grammalecte  Check-in [c5c926760b]

Overview
Comment:[graphspell] end of lemma is now a slash instead of a space
Downloads: Tarball | ZIP archive | SQL archive
Timelines: family | ancestors | descendants | both | graphspell | rg
Files: files | file ages | folders
SHA3-256: c5c926760b88e94d5a1ea9b32aad40e78b329aa9b44308191a90a557bdde39fb
User & Date: olr on 2018-06-06 09:54:07
Other Links: branch diff | manifest | tags
Context
2018-06-06
10:35
[fr] end of lemma is now a slash instead of a space check-in: 3bd7a19c94 user: olr tags: fr, rg
09:54
[graphspell] end of lemma is now a slash instead of a space check-in: c5c926760b user: olr tags: graphspell, rg
09:30
[core] darg: merge morph and morphex functions check-in: 4134a01a49 user: olr tags: core, rg
Changes

Modified graphspell-js/ibdawg.js from [241ce099fe] to [068f06a16d].

   510    510                       let sStem = ">" + this.funcStemming(sWord, this.lArcVal[nArc]);
   511    511                       // Now , we go to the next node and retrieve all following arcs values, all of them are tags
   512    512                       let iAddr2 = this._convBytesToInteger(this.byDic.slice(iEndArcAddr, iEndArcAddr+this.nBytesNodeAddress));
   513    513                       let nRawArc2 = 0;
   514    514                       while (!(nRawArc2 & this._lastArcMask)) {
   515    515                           let iEndArcAddr2 = iAddr2 + this.nBytesArc;
   516    516                           nRawArc2 = this._convBytesToInteger(this.byDic.slice(iAddr2, iEndArcAddr2));
   517         -                        l.push(sStem + " " + this.lArcVal[nRawArc2 & this._arcMask]);
          517  +                        l.push(sStem + "/" + this.lArcVal[nRawArc2 & this._arcMask]);
   518    518                           iAddr2 = iEndArcAddr2+this.nBytesNodeAddress;
   519    519                       }
   520    520                   }
   521    521                   iAddr = iEndArcAddr + this.nBytesNodeAddress;
   522    522               }
   523    523               return l;
   524    524           }

Modified graphspell-js/spellchecker.js from [a6bdb52bd3] to [5b9ccbbb56].

   236    236               lMorph.push(...this.oCommunityDic.getMorph(sWord));
   237    237           }
   238    238           if (this.bPersonalDic) {
   239    239               lMorph.push(...this.oPersonalDic.getMorph(sWord));
   240    240           }
   241    241           if (this.bStorage) {
   242    242               this._dMorphologies.set(sWord, lMorph);
   243         -            this._dLemmas.set(sWord, Array.from(new Set(this.getMorph(sWord).map((sMorph) => { return sMorph.slice(1, sMorph.indexOf(" ")); }))));
          243  +            this._dLemmas.set(sWord, Array.from(new Set(this.getMorph(sWord).map((sMorph) => { return sMorph.slice(1, sMorph.indexOf("/")); }))));
   244    244               //console.log(sWord, this._dLemmas.get(sWord));
   245    245           }
   246    246           return lMorph;
   247    247       }
   248    248   
   249    249       getLemma (sWord) {
   250    250           // retrieves lemmas
   251    251           if (this.bStorage) {
   252    252               if (!this._dLemmas.has(sWord)) {
   253    253                   this.getMorph(sWord);
   254    254               }
   255    255               return this._dLemmas.get(sWord);
   256    256           }
   257         -        return Array.from(new Set(this.getMorph(sWord).map((sMorph) => { return sMorph.slice(1, sMorph.indexOf(" ")); })));
          257  +        return Array.from(new Set(this.getMorph(sWord).map((sMorph) => { return sMorph.slice(1, sMorph.indexOf("/")); })));
   258    258       }
   259    259   
   260    260       * suggest (sWord, nSuggLimit=10) {
   261    261           // generator: returns 1, 2 or 3 lists of suggestions
   262    262           yield this.oMainDic.suggest(sWord, nSuggLimit);
   263    263           if (this.bExtendedDic) {
   264    264               yield this.oExtendedDic.suggest(sWord, nSuggLimit);

Modified graphspell/ibdawg.py from [a255097656] to [71ae57c736].

   485    485                       sStem = ">" + self.funcStemming(sWord, self.lArcVal[nArc])
   486    486                       # Now , we go to the next node and retrieve all following arcs values, all of them are tags
   487    487                       iAddr2 = int.from_bytes(self.byDic[iEndArcAddr:iEndArcAddr+self.nBytesNodeAddress], byteorder='big')
   488    488                       nRawArc2 = 0
   489    489                       while not (nRawArc2 & self._lastArcMask):
   490    490                           iEndArcAddr2 = iAddr2 + self.nBytesArc
   491    491                           nRawArc2 = int.from_bytes(self.byDic[iAddr2:iEndArcAddr2], byteorder='big')
   492         -                        l.append(sStem + " " + self.lArcVal[nRawArc2 & self._arcMask])
          492  +                        l.append(sStem + "/" + self.lArcVal[nRawArc2 & self._arcMask])
   493    493                           iAddr2 = iEndArcAddr2+self.nBytesNodeAddress
   494    494                   iAddr = iEndArcAddr+self.nBytesNodeAddress
   495    495               return l
   496    496           return []
   497    497   
   498    498       def _stem1 (self, sWord):
   499    499           "returns stems list of <sWord>"
................................................................................
   590    590                           while not (nRawArc & self._lastArcMask):
   591    591                               nRawArc = int.from_bytes(self.byDic[iAddr2:iAddr2+self.nBytesArc], byteorder='big')
   592    592                               iAddr2 += self.nBytesArc + self.nBytesNodeAddress
   593    593                       nRawArc2 = 0
   594    594                       while not (nRawArc2 & self._lastArcMask):
   595    595                           iEndArcAddr2 = iAddr2 + self.nBytesArc
   596    596                           nRawArc2 = int.from_bytes(self.byDic[iAddr2:iEndArcAddr2], byteorder='big')
   597         -                        l.append(sStem + " " + self.lArcVal[nRawArc2 & self._arcMask])
          597  +                        l.append(sStem + "/" + self.lArcVal[nRawArc2 & self._arcMask])
   598    598                           iAddr2 = iEndArcAddr2+self.nBytesNodeAddress  if not (nRawArc2 & self._addrBitMask) else iEndArcAddr2
   599    599                   iAddr = iEndArcAddr+self.nBytesNodeAddress  if not (nRawArc & self._addrBitMask)  else iEndArcAddr
   600    600               return l
   601    601           return []
   602    602   
   603    603       def _stem2 (self, sWord):
   604    604           "returns stems list of <sWord>"
................................................................................
   702    702                           iAddr2 = int.from_bytes(self.byDic[iEndArcAddr:iEndArcAddr+self.nBytesNodeAddress], byteorder='big')
   703    703                       else:
   704    704                           iAddr2 = iAddrNode + int.from_bytes(self.byDic[iEndArcAddr:iEndArcAddr+self.nBytesOffset], byteorder='big')
   705    705                       nRawArc2 = 0
   706    706                       while not (nRawArc2 & self._lastArcMask):
   707    707                           iEndArcAddr2 = iAddr2 + self.nBytesArc
   708    708                           nRawArc2 = int.from_bytes(self.byDic[iAddr2:iEndArcAddr2], byteorder='big')
   709         -                        l.append(sStem + " " + self.lArcVal[nRawArc2 & self._arcMask])
          709  +                        l.append(sStem + "/" + self.lArcVal[nRawArc2 & self._arcMask])
   710    710                           iAddr2 = iEndArcAddr2+self.nBytesNodeAddress  if not (nRawArc2 & self._addrBitMask) else iEndArcAddr2+self.nBytesOffset
   711    711                   iAddr = iEndArcAddr+self.nBytesNodeAddress  if not (nRawArc & self._addrBitMask)  else iEndArcAddr+self.nBytesOffset
   712    712               return l
   713    713           return []
   714    714   
   715    715       def _stem3 (self, sWord):
   716    716           "returns stems list of <sWord>"

Modified graphspell/spellchecker.py from [70326fed78] to [e03172e122].

   209    209               lMorph.extend(self.oExtendedDic.getMorph(sWord))
   210    210           if self.bCommunityDic:
   211    211               lMorph.extend(self.oCommunityDic.getMorph(sWord))
   212    212           if self.bPersonalDic:
   213    213               lMorph.extend(self.oPersonalDic.getMorph(sWord))
   214    214           if self.bStorage:
   215    215               self._dMorphologies[sWord] = lMorph
   216         -            self._dLemmas[sWord] = set([ s[1:s.find(" ")]  for s in lMorph ])
          216  +            self._dLemmas[sWord] = set([ s[1:s.find("/")]  for s in lMorph ])
   217    217           return lMorph
   218    218   
   219    219       def getLemma (self, sWord):
   220    220           "retrieves lemmas"
   221    221           if self.bStorage:
   222    222               if sWord not in self._dLemmas:
   223    223                   self.getMorph(sWord)
   224    224               return self._dLemmas[sWord]
   225         -        return set([ s[1:s.find(" ")]  for s in self.getMorph(sWord) ])
          225  +        return set([ s[1:s.find("/")]  for s in self.getMorph(sWord) ])
   226    226   
   227    227       def suggest (self, sWord, nSuggLimit=10):
   228    228           "generator: returns 1, 2 or 3 lists of suggestions"
   229    229           if self.dDefaultSugg:
   230    230               if sWord in self.dDefaultSugg:
   231    231                   yield self.dDefaultSugg[sWord].split("|")
   232    232               elif sWord.istitle() and sWord.lower() in self.dDefaultSugg: