Grammalecte  Changes On Branch ae767aaff504b1b6

Changes In Branch bdic_opt Through [ae767aaff5] Excluding Merge-Ins

This is equivalent to a diff from 86f302f4ef to ae767aaff5

2020-09-11
19:20
merge trunk check-in: 43afb8b856 user: olr tags: bdic_opt
19:18
[fr] tests: spellchecker.suggest() check-in: 4a19028115 user: olr tags: fr, trunk
17:22
[graphspell][py] ibdawg optimization: precalculate bytes in binary dictionary check-in: ae767aaff5 user: olr tags: bdic_opt, graphspell
15:53
[graphspell][js] ibdawg optimization: precalculate bytes in binary dictionary check-in: 443f28094b user: olr tags: bdic_opt, graphspell
14:21
[fr] faux positif check-in: 86f302f4ef user: olr tags: fr, trunk
10:12
[fr] ajustements: regex -> tokens (virgules) check-in: 5bd70d2c52 user: olr tags: fr, trunk

Modified gc_lang/fr/perf_memo.text from [6a0d81df00] to [ad156793c1].

    26     26   0.6.2       2018.02.19 19:06    5.51302     1.29359     0.874157    0.260415    0.271596    0.290641    0.684754    0.376905    0.0815201   0.00919633  (spelling normalization)
    27     27   1.0         2018.11.23 10:59    2.88577     0.702486    0.485648    0.139897    0.14079     0.148125    0.348751    0.201061    0.0360297   0.0043535   (x2, with new GC engine)
    28     28   1.1         2019.05.16 09:42    1.50743     0.360923    0.261113    0.0749272   0.0763827   0.0771537   0.180504    0.102942    0.0182762   0.0021925   (×2, but new processor: AMD Ryzen 7 2700X)
    29     29   1.2.1       2019.08.06 20:57    1.42886     0.358425    0.247356    0.0704405   0.0754886   0.0765604   0.177197    0.0988517   0.0188103   0.0020243
    30     30   1.6.0       2020.01.03 20:22    1.38847     0.346214    0.240242    0.0709539   0.0737499   0.0748733   0.176477    0.0969171   0.0187857   0.0025143   (nouveau dictionnaire avec lemmes masculins)
    31     31   1.9.0       2020.04.20 19:57    1.51183     0.369546    0.25681     0.0734314   0.0764396   0.0785668   0.183922    0.103674    0.0185812   0.002099    (NFC normalization)
    32     32   1.9.2       2020.05.12 08:43    1.62465     0.398831    0.273012    0.0810811   0.080937    0.0845885   0.204133    0.114146    0.0212864   0.0029547
    33         -1.12.2      2020.09.09 13:34    1.50568     0.374504    0.233108    0.0798712   0.0804466   0.0769674   0.171519    0.0945132   0.0165344   0.0019474   
    34         -1.12.2      2020.09.09 13:35    1.41094     0.359093    0.236443    0.06968     0.0734418   0.0738087   0.169371    0.0946279   0.0167106   0.0019773   
           33  +1.12.2      2020.09.09 13:34    1.50568     0.374504    0.233108    0.0798712   0.0804466   0.0769674   0.171519    0.0945132   0.0165344   0.0019474
           34  +1.12.2      2020.09.09 13:35    1.41094     0.359093    0.236443    0.06968     0.0734418   0.0738087   0.169371    0.0946279   0.0167106   0.0019773
           35  +1.12.2      2020.09.11 19:16    1.35297     0.330545    0.221731    0.0666998   0.0692539   0.0701707   0.160564    0.0891676   0.015807    0.0045998

Modified graphspell-js/ibdawg.js from [69d7490b82] to [1dc2c625a7].

   126    126   
   127    127           /*
   128    128               Bug workaround.
   129    129               Mozilla’s JS parser sucks. Can’t read file bigger than 4 Mb!
   130    130               So we convert huge hexadecimal string to list of numbers…
   131    131               https://github.com/mozilla/addons-linter/issues/1361
   132    132           */
          133  +        /*
          134  +            Performance trick:
          135  +            Instead of converting bytes to integers each times we parse the binary dictionary,
          136  +            we do it once, then parse the array
          137  +        */
          138  +        let nAcc = 0;
          139  +        let lBytesBuffer = [];
   133    140           let lTemp = [];
          141  +        let nDivisor = (this.nBytesArc + this.nBytesNodeAddress) / 2;
   134    142           for (let i = 0;  i < this.sByDic.length;  i+=2) {
   135         -            lTemp.push(parseInt(this.sByDic.slice(i, i+2), 16));
          143  +            lBytesBuffer.push(parseInt(this.sByDic.slice(i, i+2), 16));
          144  +            if (nAcc == (this.nBytesArc - 1)) {
          145  +                lTemp.push(this._convBytesToInteger(lBytesBuffer));
          146  +                lBytesBuffer = [];
          147  +            }
          148  +            else if (nAcc == (this.nBytesArc + this.nBytesNodeAddress - 1)) {
          149  +                lTemp.push(Math.round(this._convBytesToInteger(lBytesBuffer) / nDivisor));  // Math.round should be useless, BUT with JS who knowns what can happen…
          150  +                lBytesBuffer = [];
          151  +                nAcc = -1;
          152  +            }
          153  +            nAcc = nAcc + 1;
   136    154           }
   137    155           this.byDic = lTemp;
   138         -        //this.byDic = new Uint8Array(lTemp);  // not quicker, even slower
   139    156           /* end of bug workaround */
   140    157   
   141    158           if (!(this.sHeader.startsWith("/grammalecte-fsa/") || this.sHeader.startsWith("/pyfsa/"))) {
   142    159               throw TypeError("# Error. Not a grammalecte-fsa binary dictionary. Header: " + this.sHeader);
   143    160           }
   144    161           if (!(this.nCompressionMethod == 1 || this.nCompressionMethod == 2 || this.nCompressionMethod == 3)) {
   145    162               throw RangeError("# Error. Unknown dictionary compression method: " + this.nCompressionMethod);
................................................................................
   194    211   
   195    212           // lexicographer module ?
   196    213           this.lexicographer = null;
   197    214           // JS still sucks: we’ll try importation when importation will be available in Workers. Still waiting...
   198    215           if (self && self.hasOwnProperty("lexgraph_"+this.sLangCode)) { // self is the Worker
   199    216               this.lexicographer = self["lexgraph_"+this.sLangCode];
   200    217           }
   201         -
   202    218       }
   203    219   
   204    220       getInfo () {
   205    221           return  `  Language: ${this.sLangName}   Lang code: ${this.sLangCode}   Dictionary name: ${this.sDicName}\n` +
   206    222                   `  Compression method: ${this.nCompressionMethod}   Date: ${this.sDate}   Stemming: ${this.cStemming}FX\n` +
   207    223                   `  Arcs values:  ${this.nArcVal} = ${this.nChar} characters,  ${this.nAff} affixes,  ${this.nTag} tags\n` +
   208    224                   `  Dictionary: ${this.nEntry} entries,    ${this.nNode} nodes,   ${this.nArc} arcs\n` +
................................................................................
   304    320                   return false;
   305    321               }
   306    322               iAddr = this._lookupArcNode(this.dChar.get(c), iAddr);
   307    323               if (iAddr === null) {
   308    324                   return false;
   309    325               }
   310    326           }
   311         -        return Boolean(this._convBytesToInteger(this.byDic.slice(iAddr, iAddr+this.nBytesArc)) & this._finalNodeMask);
          327  +        return Boolean(this.byDic[iAddr] & this._finalNodeMask);
   312    328       }
   313    329   
   314    330       getMorph (sWord) {
   315    331           // retrieves morphologies list, different casing allowed
   316    332           if (!sWord) {
   317    333               return [];
   318    334           }
................................................................................
   376    392               }
   377    393           }
   378    394       }
   379    395   
   380    396       _suggest (oSuggResult, sRemain, nMaxSwitch=0, nMaxDel=0, nMaxHardRepl=0, nMaxJump=0, nDist=0, nDeep=0, iAddr=0, sNewWord="", bAvoidLoop=false) {
   381    397           // returns a set of suggestions
   382    398           // recursive function
   383         -        if (this._convBytesToInteger(this.byDic.slice(iAddr, iAddr+this.nBytesArc)) & this._finalNodeMask) {
          399  +        if (this.byDic[iAddr] & this._finalNodeMask) {
   384    400               if (sRemain == "") {
   385    401                   oSuggResult.addSugg(sNewWord);
   386    402                   for (let sTail of this._getTails(iAddr)) {
   387    403                       oSuggResult.addSugg(sNewWord+sTail);
   388    404                   }
   389    405                   return;
   390    406               }
................................................................................
   486    502       }
   487    503   
   488    504       _getTails (iAddr, sTail="", n=2) {
   489    505           // return a list of suffixes ending at a distance of <n> from <iAddr>
   490    506           let aTails = new Set();
   491    507           for (let [nVal, jAddr] of this._getArcs(iAddr)) {
   492    508               if (nVal <= this.nChar) {
   493         -                if (this._convBytesToInteger(this.byDic.slice(jAddr, jAddr+this.nBytesArc)) & this._finalNodeMask) {
          509  +                if (this.byDic[jAddr] & this._finalNodeMask) {
   494    510                       aTails.add(sTail + this.dCharVal.get(nVal));
   495    511                   }
   496    512                   if (n && aTails.size == 0) {
   497    513                       aTails.gl_update(this._getTails(jAddr, sTail+this.dCharVal.get(nVal), n-1));
   498    514                   }
   499    515               }
   500    516           }
................................................................................
   564    580                   return [];
   565    581               }
   566    582               iAddr = this._lookupArcNode(this.dChar.get(c), iAddr);
   567    583               if (iAddr === null) {
   568    584                   return [];
   569    585               }
   570    586           }
   571         -        if (this._convBytesToInteger(this.byDic.slice(iAddr, iAddr+this.nBytesArc)) & this._finalNodeMask) {
          587  +        if (this.byDic[iAddr] & this._finalNodeMask) {
   572    588               let l = [];
   573    589               let nRawArc = 0;
   574    590               while (!(nRawArc & this._lastArcMask)) {
   575         -                let iEndArcAddr = iAddr + this.nBytesArc;
   576         -                nRawArc = this._convBytesToInteger(this.byDic.slice(iAddr, iEndArcAddr));
          591  +                let iEndArcAddr = iAddr + 1;
          592  +                nRawArc = this.byDic[iAddr];
   577    593                   let nArc = nRawArc & this._arcMask;
   578    594                   if (nArc > this.nChar) {
   579    595                       // This value is not a char, this is a stemming code
   580    596                       let sStem = ">" + this.funcStemming(sWord, this.lArcVal[nArc]);
   581    597                       // Now , we go to the next node and retrieve all following arcs values, all of them are tags
   582         -                    let iAddr2 = this._convBytesToInteger(this.byDic.slice(iEndArcAddr, iEndArcAddr+this.nBytesNodeAddress));
          598  +                    let iAddr2 = this.byDic[iEndArcAddr];
   583    599                       let nRawArc2 = 0;
   584    600                       while (!(nRawArc2 & this._lastArcMask)) {
   585         -                        let iEndArcAddr2 = iAddr2 + this.nBytesArc;
   586         -                        nRawArc2 = this._convBytesToInteger(this.byDic.slice(iAddr2, iEndArcAddr2));
          601  +                        let iEndArcAddr2 = iAddr2 + 1;
          602  +                        nRawArc2 = this.byDic[iAddr2];
   587    603                           l.push(sStem + "/" + this.lArcVal[nRawArc2 & this._arcMask]);
   588         -                        iAddr2 = iEndArcAddr2+this.nBytesNodeAddress;
          604  +                        iAddr2 = iEndArcAddr2 + 1;
   589    605                       }
   590    606                   }
   591         -                iAddr = iEndArcAddr + this.nBytesNodeAddress;
          607  +                iAddr = iEndArcAddr + 1;
   592    608               }
   593    609               return l;
   594    610           }
   595    611           return [];
   596    612       }
   597    613   
   598    614       _stem1 (sWord) {
................................................................................
   603    619                   return [];
   604    620               }
   605    621               iAddr = this._lookupArcNode(this.dChar.get(c), iAddr);
   606    622               if (iAddr === null) {
   607    623                   return [];
   608    624               }
   609    625           }
   610         -        if (this._convBytesToInteger(this.byDic.slice(iAddr, iAddr+this.nBytesArc)) & this._finalNodeMask) {
          626  +        if (this.byDic[iAddr] & this._finalNodeMask) {
   611    627               let l = [];
   612    628               let nRawArc = 0;
   613    629               while (!(nRawArc & this._lastArcMask)) {
   614         -                let iEndArcAddr = iAddr + this.nBytesArc;
   615         -                nRawArc = this._convBytesToInteger(this.byDic.slice(iAddr, iEndArcAddr));
          630  +                let iEndArcAddr = iAddr + 1;
          631  +                nRawArc = this.byDic[iAddr];
   616    632                   let nArc = nRawArc & this._arcMask;
   617    633                   if (nArc > this.nChar) {
   618    634                       // This value is not a char, this is a stemming code
   619    635                       l.push(this.funcStemming(sWord, this.lArcVal[nArc]));
   620    636                   }
   621         -                iAddr = iEndArcAddr + this.nBytesNodeAddress;
          637  +                iAddr = iEndArcAddr + 1;
   622    638               }
   623    639               return l;
   624    640           }
   625    641           return [];
   626    642       }
   627    643   
   628    644       _lookupArcNode1 (nVal, iAddr) {
   629    645           // looks if nVal is an arc at the node at iAddr, if yes, returns address of next node else None
   630    646           while (true) {
   631         -            let iEndArcAddr = iAddr+this.nBytesArc;
   632         -            let nRawArc = this._convBytesToInteger(this.byDic.slice(iAddr, iEndArcAddr));
          647  +            let iEndArcAddr = iAddr+1;
          648  +            let nRawArc = this.byDic[iAddr];
   633    649               if (nVal == (nRawArc & this._arcMask)) {
   634    650                   // the value we are looking for
   635    651                   // we return the address of the next node
   636         -                return this._convBytesToInteger(this.byDic.slice(iEndArcAddr, iEndArcAddr+this.nBytesNodeAddress));
          652  +                return this.byDic[iEndArcAddr];
   637    653               }
   638    654               else {
   639    655                   // value not found
   640    656                   if (nRawArc & this._lastArcMask) {
   641    657                       return null;
   642    658                   }
   643         -                iAddr = iEndArcAddr + this.nBytesNodeAddress;
          659  +                iAddr = iEndArcAddr + 1;
   644    660               }
   645    661           }
   646    662       }
   647    663   
   648    664       * _getArcs1 (iAddr) {
   649    665           // generator: return all arcs at <iAddr> as tuples of (nVal, iAddr)
   650    666           while (true) {
   651         -            let iEndArcAddr = iAddr+this.nBytesArc;
   652         -            let nRawArc = this._convBytesToInteger(this.byDic.slice(iAddr, iEndArcAddr));
   653         -            yield [nRawArc & this._arcMask, this._convBytesToInteger(this.byDic.slice(iEndArcAddr, iEndArcAddr+this.nBytesNodeAddress))];
          667  +            let iEndArcAddr = iAddr+1;
          668  +            let nRawArc = this.byDic[iAddr];
          669  +            yield [nRawArc & this._arcMask, this.byDic[iEndArcAddr]];
   654    670               if (nRawArc & this._lastArcMask) {
   655    671                   break;
   656    672               }
   657         -            iAddr = iEndArcAddr+this.nBytesNodeAddress;
          673  +            iAddr = iEndArcAddr+1;
   658    674           }
   659    675       }
   660    676   
   661    677       // VERSION 2
   662    678       _morph2 (sWord) {
   663    679           // to do
   664    680       }

Modified graphspell/ibdawg.py from [d16ed0d683] to [0fe5cbd03f].

   116    116               else:
   117    117                   raise OSError("# Error. Unknown file type: "+source)
   118    118           else:
   119    119               self._initJSON(source)
   120    120   
   121    121           self.sFileName = source  if isinstance(source, str)  else "[None]"
   122    122   
          123  +        # Performance trick:
          124  +        #     Instead of converting bytes to integers each times we parse the binary dictionary,
          125  +        #     we do it once, then parse the array
          126  +        nAcc = 0
          127  +        byBuffer = b""
          128  +        lTemp = []
          129  +        nDivisor = (self.nBytesArc + self.nBytesNodeAddress) / 2
          130  +        for i in range(0, len(self.byDic)):
          131  +            byBuffer += self.byDic[i:i+1]
          132  +            if nAcc == (self.nBytesArc - 1):
          133  +                lTemp.append(int.from_bytes(byBuffer, byteorder="big"))
          134  +                byBuffer = b""
          135  +            elif nAcc == (self.nBytesArc + self.nBytesNodeAddress - 1):
          136  +                lTemp.append(round(int.from_bytes(byBuffer, byteorder="big") / nDivisor))
          137  +                byBuffer = b""
          138  +                nAcc = -1
          139  +            nAcc = nAcc + 1
          140  +        self.byDic = lTemp;
          141  +
          142  +        # masks
   123    143           self._arcMask = (2 ** ((self.nBytesArc * 8) - 3)) - 1
   124    144           self._finalNodeMask = 1 << ((self.nBytesArc * 8) - 1)
   125    145           self._lastArcMask = 1 << ((self.nBytesArc * 8) - 2)
   126    146           self._addrBitMask = 1 << ((self.nBytesArc * 8) - 3)  # version 2
   127    147   
   128    148           # function to decode the affix/suffix code
   129    149           if self.cStemming == "S":
................................................................................
   296    316           iAddr = 0
   297    317           for c in sWord:
   298    318               if c not in self.dChar:
   299    319                   return False
   300    320               iAddr = self._lookupArcNode(self.dChar[c], iAddr)
   301    321               if iAddr is None:
   302    322                   return False
   303         -        return bool(int.from_bytes(self.byDic[iAddr:iAddr+self.nBytesArc], byteorder='big') & self._finalNodeMask)
          323  +        return bool(self.byDic[iAddr] & self._finalNodeMask)
   304    324   
   305    325       def getMorph (self, sWord):
   306    326           "retrieves morphologies list, different casing allowed"
   307    327           if not sWord:
   308    328               return []
   309    329           sWord = st.spellingNormalization(sWord)
   310    330           l = self.morph(sWord)
................................................................................
   352    372                   sWord1, sWord2 = sWord.split(cSplitter, 1)
   353    373                   if self.isValid(sWord1) and self.isValid(sWord2):
   354    374                       oSuggResult.addSugg(sWord1+" "+sWord2)
   355    375   
   356    376       def _suggest (self, oSuggResult, sRemain, nMaxSwitch=0, nMaxDel=0, nMaxHardRepl=0, nMaxJump=0, nDist=0, nDeep=0, iAddr=0, sNewWord="", bAvoidLoop=False):
   357    377           # recursive function
   358    378           #logging.info((nDeep * "  ") + sNewWord + ":" + sRemain)
   359         -        if int.from_bytes(self.byDic[iAddr:iAddr+self.nBytesArc], byteorder='big') & self._finalNodeMask:
          379  +        if self.byDic[iAddr] & self._finalNodeMask:
   360    380               if not sRemain:
   361    381                   oSuggResult.addSugg(sNewWord, nDeep)
   362    382                   for sTail in self._getTails(iAddr):
   363    383                       oSuggResult.addSugg(sNewWord+sTail, nDeep)
   364    384                   return
   365    385               if (len(sNewWord) + len(sRemain) == len(oSuggResult.sWord)) and oSuggResult.sWord.lower().startswith(sNewWord.lower()) and self.isValid(sRemain):
   366    386                   if self.sLangCode == "fr" and sNewWord.lower() in ("l", "d", "n", "m", "t", "s", "c", "j", "qu", "lorsqu", "puisqu", "quoiqu", "jusqu", "quelqu") and sRemain[0:1] in cp.aVowel:
................................................................................
   419    439                   yield (self.dCharVal[nVal], jAddr)
   420    440   
   421    441       def _getTails (self, iAddr, sTail="", n=2):
   422    442           "return a list of suffixes ending at a distance of <n> from <iAddr>"
   423    443           aTails = set()
   424    444           for nVal, jAddr in self._getArcs(iAddr):
   425    445               if nVal <= self.nChar:
   426         -                if int.from_bytes(self.byDic[jAddr:jAddr+self.nBytesArc], byteorder='big') & self._finalNodeMask:
          446  +                if self.byDic[jAddr] & self._finalNodeMask:
   427    447                       aTails.add(sTail + self.dCharVal[nVal])
   428    448                   if n and not aTails:
   429    449                       aTails.update(self._getTails(jAddr, sTail+self.dCharVal[nVal], n-1))
   430    450           return aTails
   431    451   
   432    452       def drawPath (self, sWord, iAddr=0):
   433    453           "show the path taken by <sWord> in the graph"
................................................................................
   495    515           iAddr = 0
   496    516           for c in sWord:
   497    517               if c not in self.dChar:
   498    518                   return []
   499    519               iAddr = self._lookupArcNode(self.dChar[c], iAddr)
   500    520               if iAddr is None:
   501    521                   return []
   502         -        if int.from_bytes(self.byDic[iAddr:iAddr+self.nBytesArc], byteorder='big') & self._finalNodeMask:
          522  +        if self.byDic[iAddr] & self._finalNodeMask:
   503    523               l = []
   504    524               nRawArc = 0
   505    525               while not nRawArc & self._lastArcMask:
   506         -                iEndArcAddr = iAddr + self.nBytesArc
   507         -                nRawArc = int.from_bytes(self.byDic[iAddr:iEndArcAddr], byteorder='big')
          526  +                iEndArcAddr = iAddr + 1
          527  +                nRawArc = self.byDic[iAddr]
   508    528                   nArc = nRawArc & self._arcMask
   509    529                   if nArc > self.nChar:
   510    530                       # This value is not a char, this is a stemming code
   511    531                       sStem = ">" + self.funcStemming(sWord, self.lArcVal[nArc])
   512    532                       # Now , we go to the next node and retrieve all following arcs values, all of them are tags
   513         -                    iAddr2 = int.from_bytes(self.byDic[iEndArcAddr:iEndArcAddr+self.nBytesNodeAddress], byteorder='big')
          533  +                    iAddr2 = self.byDic[iEndArcAddr]
   514    534                       nRawArc2 = 0
   515    535                       while not nRawArc2 & self._lastArcMask:
   516         -                        iEndArcAddr2 = iAddr2 + self.nBytesArc
   517         -                        nRawArc2 = int.from_bytes(self.byDic[iAddr2:iEndArcAddr2], byteorder='big')
          536  +                        iEndArcAddr2 = iAddr2 + 1
          537  +                        nRawArc2 = self.byDic[iAddr2]
   518    538                           l.append(sStem + "/" + self.lArcVal[nRawArc2 & self._arcMask])
   519         -                        iAddr2 = iEndArcAddr2+self.nBytesNodeAddress
   520         -                iAddr = iEndArcAddr+self.nBytesNodeAddress
          539  +                        iAddr2 = iEndArcAddr2 + 1
          540  +                iAddr = iEndArcAddr + 1
   521    541               return l
   522    542           return []
   523    543   
   524    544       def _stem1 (self, sWord):
   525    545           "returns stems list of <sWord>"
   526    546           iAddr = 0
   527    547           for c in sWord:
   528    548               if c not in self.dChar:
   529    549                   return []
   530    550               iAddr = self._lookupArcNode(self.dChar[c], iAddr)
   531    551               if iAddr is None:
   532    552                   return []
   533         -        if int.from_bytes(self.byDic[iAddr:iAddr+self.nBytesArc], byteorder='big') & self._finalNodeMask:
          553  +        if self.byDic[iAddr] & self._finalNodeMask:
   534    554               l = []
   535    555               nRawArc = 0
   536    556               while not nRawArc & self._lastArcMask:
   537         -                iEndArcAddr = iAddr + self.nBytesArc
   538         -                nRawArc = int.from_bytes(self.byDic[iAddr:iEndArcAddr], byteorder='big')
          557  +                iEndArcAddr = iAddr + 1
          558  +                nRawArc = self.byDic[iAddr]
   539    559                   nArc = nRawArc & self._arcMask
   540    560                   if nArc > self.nChar:
   541    561                       # This value is not a char, this is a stemming code
   542    562                       l.append(self.funcStemming(sWord, self.lArcVal[nArc]))
   543         -                iAddr = iEndArcAddr+self.nBytesNodeAddress
          563  +                iAddr = iEndArcAddr + 1
   544    564               return l
   545    565           return []
   546    566   
   547    567       def _lookupArcNode1 (self, nVal, iAddr):
   548    568           "looks if <nVal> is an arc at the node at <iAddr>, if yes, returns address of next node else None"
   549    569           while True:
   550         -            iEndArcAddr = iAddr+self.nBytesArc
   551         -            nRawArc = int.from_bytes(self.byDic[iAddr:iEndArcAddr], byteorder='big')
          570  +            iEndArcAddr = iAddr + 1
          571  +            nRawArc = self.byDic[iAddr]
   552    572               if nVal == (nRawArc & self._arcMask):
   553    573                   # the value we are looking for
   554    574                   # we return the address of the next node
   555         -                return int.from_bytes(self.byDic[iEndArcAddr:iEndArcAddr+self.nBytesNodeAddress], byteorder='big')
          575  +                return self.byDic[iEndArcAddr]
   556    576               # value not found
   557    577               if nRawArc & self._lastArcMask:
   558    578                   return None
   559         -            iAddr = iEndArcAddr+self.nBytesNodeAddress
          579  +            iAddr = iEndArcAddr + 1
   560    580   
   561    581       def _getArcs1 (self, iAddr):
   562    582           "generator: return all arcs at <iAddr> as tuples of (nVal, iAddr)"
   563    583           while True:
   564         -            iEndArcAddr = iAddr+self.nBytesArc
   565         -            nRawArc = int.from_bytes(self.byDic[iAddr:iEndArcAddr], byteorder='big')
   566         -            yield nRawArc & self._arcMask, int.from_bytes(self.byDic[iEndArcAddr:iEndArcAddr+self.nBytesNodeAddress], byteorder='big')
          584  +            iEndArcAddr = iAddr + 1
          585  +            nRawArc = self.byDic[iAddr]
          586  +            yield nRawArc & self._arcMask, self.byDic[iEndArcAddr]
   567    587               if nRawArc & self._lastArcMask:
   568    588                   break
   569         -            iAddr = iEndArcAddr+self.nBytesNodeAddress
          589  +            iAddr = iEndArcAddr + 1
   570    590   
   571    591       def _writeNodes1 (self, spfDest):
   572    592           "for debugging only"
   573    593           print(" > Write binary nodes")
   574    594           with open(spfDest, 'w', 'utf-8', newline="\n") as hDst:
   575    595               iAddr = 0
   576    596               hDst.write("i{:_>10} -- #{:_>10}\n".format("0", iAddr))
   577    597               while iAddr < len(self.byDic):
   578         -                iEndArcAddr = iAddr+self.nBytesArc
   579         -                nRawArc = int.from_bytes(self.byDic[iAddr:iEndArcAddr], byteorder='big')
          598  +                iEndArcAddr = iAddr + 1
          599  +                nRawArc = self.byDic[iAddr]
   580    600                   nArc = nRawArc & self._arcMask
   581         -                hDst.write("  {:<20}  {:0>16}  i{:>10}   #{:_>10}\n".format(self.lArcVal[nArc], bin(nRawArc)[2:], "?", \
   582         -                                                                            int.from_bytes(self.byDic[iEndArcAddr:iEndArcAddr+self.nBytesNodeAddress], \
   583         -                                                                                           byteorder='big')))
   584         -                iAddr = iEndArcAddr+self.nBytesNodeAddress
          601  +                hDst.write("  {:<20}  {:0>16}  i{:>10}   #{:_>10}\n".format(self.lArcVal[nArc], bin(nRawArc)[2:], "?", self.byDic[iEndArcAddr]))
          602  +                iAddr = iEndArcAddr + 1
   585    603                   if (nRawArc & self._lastArcMask) and iAddr < len(self.byDic):
   586    604                       hDst.write("\ni{:_>10} -- #{:_>10}\n".format("?", iAddr))
   587    605               hDst.close()
   588    606   
   589    607       # VERSION 2
   590    608       def _morph2 (self, sWord):
   591    609           "returns morphologies of <sWord>"