Grammalecte  Check-in [5538934848]

Overview
Comment:[graphspell][fx] dawg: remove useless parameters
Downloads: Tarball | ZIP archive | SQL archive
Timelines: family | ancestors | descendants | both | fx | graphspell | dict2
Files: files | file ages | folders
SHA3-256: 5538934848f82156ff524ccf9654299bbfe1ee5ea832b765b1d0529c28123028
User & Date: olr on 2020-11-05 16:25:58
Other Links: branch diff | manifest | tags
Context
2020-11-07
11:40
[graphspell][build][lo][fx] merge dict2: use binary list instead of binary string, drop support for binary file -> use JSON, code cleaning check-in: 40ebc5eada user: olr tags: build, fx, graphspell, lo, major_change, trunk
2020-11-05
16:25
[graphspell][fx] dawg: remove useless parameters Closed-Leaf check-in: 5538934848 user: olr tags: dict2, fx, graphspell
13:27
[graphspell][js] fix syntax error check-in: 6d54aadbb1 user: olr tags: dict2, graphspell
Changes

Modified gc_lang/fr/webext/panel/lex_editor.js from [a95656b530] to [ffa654539b].

757
758
759
760
761
762
763
764
765
766
767
768
769
770
771
    },

    build: function () {
        let xProgressNode = document.getElementById("wait_progress");
        let lEntry = oLexiconTable.getEntries();
        if (lEntry.length > 0) {
            let oDAWG = new DAWG(lEntry, "S", "fr", "Français", this.sName, this.sDescription, xProgressNode);
            let oJSON = oDAWG.createBinaryJSON(1);
            oDictHandler.saveDictionary(this.sName, oJSON);
            this.oIBDAWG = new IBDAWG(oJSON);
            this.setDictData(this.oIBDAWG.nEntry, this.oIBDAWG.sDate);
        } else {
            oDictHandler.saveDictionary(this.sName, null);
            this.setDictData(0, "[néant]");
        }







|







757
758
759
760
761
762
763
764
765
766
767
768
769
770
771
    },

    build: function () {
        let xProgressNode = document.getElementById("wait_progress");
        let lEntry = oLexiconTable.getEntries();
        if (lEntry.length > 0) {
            let oDAWG = new DAWG(lEntry, "S", "fr", "Français", this.sName, this.sDescription, xProgressNode);
            let oJSON = oDAWG.createBinaryJSON();
            oDictHandler.saveDictionary(this.sName, oJSON);
            this.oIBDAWG = new IBDAWG(oJSON);
            this.setDictData(this.oIBDAWG.nEntry, this.oIBDAWG.sDate);
        } else {
            oDictHandler.saveDictionary(this.sName, null);
            this.setDictData(0, "[néant]");
        }

Modified graphspell-js/dawg.js from [4cbdb7b217] to [82fa7fdc68].

340
341
342
343
344
345
346
347
348
349
350
351
352
353
354
...
403
404
405
406
407
408
409
410
411
412
413
414
415
416
417
418
...
424
425
426
427
428
429
430
431
432
433
434
435
436
437
438
                    }
                }
            }
        }
    }

    // BINARY CONVERSION
    _calculateBinary (nCompressionMethod=1) {
        console.log("Write DAWG as an indexable binary dictionary");
        this.nBytesArc = Math.floor( (this.nArcVal.toString(2).length + 2) / 8 ) + 1;     // We add 2 bits. See DawgNode.convToBytes()
        this.nBytesOffset = 0;
        this._calcNumBytesNodeAddress();
        this._calcNodesAddress();
        this.sByDic = this.oRoot.convToBytes(this.nBytesArc, this.nBytesNodeAddress);
        for (let oNode of this.dMinimizedNodes.values()) {
................................................................................
        for (let n of aBytes) {
            nVal += n << nWeight;
            nWeight = nWeight - 8;
        }
        return nVal;
    }

    createBinaryJSON (nCompressionMethod=1) {
        this._calculateBinary(nCompressionMethod);
        this._binaryToList();
        let oJSON = {
            "sHeader": "/grammalecte-fsa/",
            "sLangCode": this.sLangCode,
            "sLangName": this.sLangName,
            "sDicName": this.sDicName,
            "sDescription": this.sDescription,
................................................................................
            "nTag": this.nTag,
            "cStemming": this.cStemming,
            "dChar": helpers.mapToObject(this.dChar),
            "nNode": this.nNode,
            "nArc": this.nArc,
            "lArcVal": this.lArcVal,
            "nArcVal": this.nArcVal,
            "nCompressionMethod": nCompressionMethod,
            "nBytesArc": this.nBytesArc,
            "nBytesNodeAddress": this.nBytesNodeAddress,
            "nBytesOffset": this.nBytesOffset,
            //"sByDic": this.sByDic,    // binary word graph
            "lByDic": this.lByDic,
            "l2grams": Array.from(this.a2grams)
        };







|







 







|
|







 







<







340
341
342
343
344
345
346
347
348
349
350
351
352
353
354
...
403
404
405
406
407
408
409
410
411
412
413
414
415
416
417
418
...
424
425
426
427
428
429
430

431
432
433
434
435
436
437
                    }
                }
            }
        }
    }

    // BINARY CONVERSION
    _calculateBinary () {
        console.log("Write DAWG as an indexable binary dictionary");
        this.nBytesArc = Math.floor( (this.nArcVal.toString(2).length + 2) / 8 ) + 1;     // We add 2 bits. See DawgNode.convToBytes()
        this.nBytesOffset = 0;
        this._calcNumBytesNodeAddress();
        this._calcNodesAddress();
        this.sByDic = this.oRoot.convToBytes(this.nBytesArc, this.nBytesNodeAddress);
        for (let oNode of this.dMinimizedNodes.values()) {
................................................................................
        for (let n of aBytes) {
            nVal += n << nWeight;
            nWeight = nWeight - 8;
        }
        return nVal;
    }

    createBinaryJSON () {
        this._calculateBinary();
        this._binaryToList();
        let oJSON = {
            "sHeader": "/grammalecte-fsa/",
            "sLangCode": this.sLangCode,
            "sLangName": this.sLangName,
            "sDicName": this.sDicName,
            "sDescription": this.sDescription,
................................................................................
            "nTag": this.nTag,
            "cStemming": this.cStemming,
            "dChar": helpers.mapToObject(this.dChar),
            "nNode": this.nNode,
            "nArc": this.nArc,
            "lArcVal": this.lArcVal,
            "nArcVal": this.nArcVal,

            "nBytesArc": this.nBytesArc,
            "nBytesNodeAddress": this.nBytesNodeAddress,
            "nBytesOffset": this.nBytesOffset,
            //"sByDic": this.sByDic,    // binary word graph
            "lByDic": this.lByDic,
            "l2grams": Array.from(this.a2grams)
        };

Modified graphspell-js/dic_merger.js from [dea1fd0b02] to [10f02569ea].

34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
            }
        }
        if (xProgressBar) {
            xProgressBar.value = xProgressBar.max;
        }
        try {
            let oDAWG = new DAWG(lEntry, cStemming, sLangCode, sLangName, sDicName, sDescription, xProgressBar);
            let oDict = oDAWG.createBinaryJSON(1);
            return oDict;
        }
        catch (e) {
            console.log("Dictionaries merger: unable to generate merged dictionary");
            console.error(e);
            return null;
        }
    }
}







|









34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
            }
        }
        if (xProgressBar) {
            xProgressBar.value = xProgressBar.max;
        }
        try {
            let oDAWG = new DAWG(lEntry, cStemming, sLangCode, sLangName, sDicName, sDescription, xProgressBar);
            let oDict = oDAWG.createBinaryJSON();
            return oDict;
        }
        catch (e) {
            console.log("Dictionaries merger: unable to generate merged dictionary");
            console.error(e);
            return null;
        }
    }
}

Modified graphspell-js/ibdawg.js from [8c83fe3dde] to [73dd04c644].

143
144
145
146
147
148
149
150
151
152
153
154
155
156
157
...
207
208
209
210
211
212
213
214
215
216
217
218
219
220
221
...
232
233
234
235
236
237
238
239
240
241
242
243
244
245
246
            console.error(e);
            console.log("path: " + sPath);
            console.log("dic:" + source.slice(0, 1000));
            throw Error("# Error. File not found or not loadable.\n" + e.message + "\n");
        }
        /*
            Properties:
            sName, nCompressionMethod, sHeader, lArcVal, nArcVal, sByDic, sLang, nChar, nBytesArc, nBytesNodeAddress,
            nEntry, nNode, nArc, nAff, cStemming, nTag, dChar, nBytesOffset,
        */

        if (!(this.sHeader.startsWith("/grammalecte-fsa/") || this.sHeader.startsWith("/pyfsa/"))) {
            throw TypeError("# Error. Not a grammalecte-fsa binary dictionary. Header: " + this.sHeader);
        }
        // <dChar> to get the value of an arc, <dCharVal> to get the char of an arc with its value
................................................................................
        if (self && self.hasOwnProperty("lexgraph_"+this.sLangCode)) { // self is the Worker
            this.lexicographer = self["lexgraph_"+this.sLangCode];
        }
    }

    getInfo () {
        return  `  Language: ${this.sLangName}   Lang code: ${this.sLangCode}   Dictionary name: ${this.sDicName}\n` +
                `  Compression method: ${this.nCompressionMethod}   Date: ${this.sDate}   Stemming: ${this.cStemming}FX\n` +
                `  Arcs values:  ${this.nArcVal} = ${this.nChar} characters,  ${this.nAff} affixes,  ${this.nTag} tags\n` +
                `  Dictionary: ${this.nEntry} entries,    ${this.nNode} nodes,   ${this.nArc} arcs\n` +
                `  Address size: ${this.nBytesNodeAddress} bytes,  Arc size: ${this.nBytesArc} bytes\n`;
    }

    getJSON () {
        let oJSON = {
................................................................................
            "nTag": this.nTag,
            "cStemming": this.cStemming,
            "dChar": helpers.mapToObject(this.dChar),
            "nNode": this.nNode,
            "nArc": this.nArc,
            "lArcVal": this.lArcVal,
            "nArcVal": this.nArcVal,
            "nCompressionMethod": this.nCompressionMethod,
            "nBytesArc": this.nBytesArc,
            "nBytesNodeAddress": this.nBytesNodeAddress,
            "nBytesOffset": this.nBytesOffset,
            "sByDic": this.sByDic,  // binary word graph
            "l2grams": this.l2grams
        };
        return oJSON;







|







 







|







 







<







143
144
145
146
147
148
149
150
151
152
153
154
155
156
157
...
207
208
209
210
211
212
213
214
215
216
217
218
219
220
221
...
232
233
234
235
236
237
238

239
240
241
242
243
244
245
            console.error(e);
            console.log("path: " + sPath);
            console.log("dic:" + source.slice(0, 1000));
            throw Error("# Error. File not found or not loadable.\n" + e.message + "\n");
        }
        /*
            Properties:
            sName, sHeader, lArcVal, nArcVal, sByDic, sLang, nChar, nBytesArc, nBytesNodeAddress,
            nEntry, nNode, nArc, nAff, cStemming, nTag, dChar, nBytesOffset,
        */

        if (!(this.sHeader.startsWith("/grammalecte-fsa/") || this.sHeader.startsWith("/pyfsa/"))) {
            throw TypeError("# Error. Not a grammalecte-fsa binary dictionary. Header: " + this.sHeader);
        }
        // <dChar> to get the value of an arc, <dCharVal> to get the char of an arc with its value
................................................................................
        if (self && self.hasOwnProperty("lexgraph_"+this.sLangCode)) { // self is the Worker
            this.lexicographer = self["lexgraph_"+this.sLangCode];
        }
    }

    getInfo () {
        return  `  Language: ${this.sLangName}   Lang code: ${this.sLangCode}   Dictionary name: ${this.sDicName}\n` +
                `  Date: ${this.sDate}   Stemming: ${this.cStemming}FX\n` +
                `  Arcs values:  ${this.nArcVal} = ${this.nChar} characters,  ${this.nAff} affixes,  ${this.nTag} tags\n` +
                `  Dictionary: ${this.nEntry} entries,    ${this.nNode} nodes,   ${this.nArc} arcs\n` +
                `  Address size: ${this.nBytesNodeAddress} bytes,  Arc size: ${this.nBytesArc} bytes\n`;
    }

    getJSON () {
        let oJSON = {
................................................................................
            "nTag": this.nTag,
            "cStemming": this.cStemming,
            "dChar": helpers.mapToObject(this.dChar),
            "nNode": this.nNode,
            "nArc": this.nArc,
            "lArcVal": this.lArcVal,
            "nArcVal": this.nArcVal,

            "nBytesArc": this.nBytesArc,
            "nBytesNodeAddress": this.nBytesNodeAddress,
            "nBytesOffset": this.nBytesOffset,
            "sByDic": this.sByDic,  // binary word graph
            "l2grams": this.l2grams
        };
        return oJSON;

Modified graphspell/dawg.py from [781b24100a] to [729715ac89].

354
355
356
357
358
359
360
361
362
363
364
365
366
367
368
...
383
384
385
386
387
388
389







390
391
392
393
394
395
396
...
398
399
400
401
402
403
404
405
406
407
408
409
410
411
412
413
414
...
420
421
422
423
424
425
426
427
428
429
430
431
432
433
434
435
436
437
438
439
440
441
442
443
444
445
446
447
448
449
450
451
452
453
454
455
456
457
458
459
460
461
462
463
                sEntry = sWord + "\t" + self.funcStemming(sWord, self.lArcVal[nVal])
                for nMorphVal, _ in oNextNode.arcs.items():
                    if not zPattern or zPattern.search(self.lArcVal[nMorphVal]):
                        yield sEntry + "\t" + self.lArcVal[nMorphVal]


    # BINARY CONVERSION
    def _calculateBinary (self, nCompressionMethod=1):
        print(" > Write DAWG as an indexable binary dictionary")
        self.nBytesArc = ( (self.nArcVal.bit_length() + 2) // 8 ) + 1   # We add 2 bits. See DawgNode.convToBytes()
        self.nBytesOffset = 0
        self._calcNumBytesNodeAddress()
        self._calcNodesAddress()
        self.byDic = b""
        self.byDic = self.oRoot.convToBytes(self.nBytesArc, self.nBytesNodeAddress)
................................................................................
        nBytesNode = self.nBytesArc + self.nBytesNodeAddress
        iAddr = len(self.oRoot.arcs) * nBytesNode
        for oNode in self.lMinimizedNodes:
            oNode.addr = iAddr
            iAddr += max(len(oNode.arcs), 1) * nBytesNode

    def _binaryToList (self):







        self.lByDic = []
        nAcc = 0
        byBuffer = b""
        nDivisor = (self.nBytesArc + self.nBytesNodeAddress) / 2
        for i in range(0, len(self.byDic)):
            byBuffer += self.byDic[i:i+1]
            if nAcc == (self.nBytesArc - 1):
................................................................................
                byBuffer = b""
            elif nAcc == (self.nBytesArc + self.nBytesNodeAddress - 1):
                self.lByDic.append(round(int.from_bytes(byBuffer, byteorder="big") / nDivisor))
                byBuffer = b""
                nAcc = -1
            nAcc = nAcc + 1

    def getBinaryAsJSON (self, nCompressionMethod=1):
        "return a JSON string containing all necessary data of the dictionary (compressed as a binary string)"
        self._calculateBinary(nCompressionMethod)
        self._binaryToList()
        return {
            "sHeader": "/grammalecte-fsa/",
            "sLangCode": self.sLangCode,
            "sLangName": self.sLangName,
            "sDicName": self.sDicName,
            "sDescription": self.sDescription,
................................................................................
            "nTag": self.nTag,
            "cStemming": self.cStemming,
            "dChar": self.dChar,
            "nNode": self.nNode,
            "nArc": self.nArc,
            "nArcVal": self.nArcVal,
            "lArcVal": self.lArcVal,
            "nCompressionMethod": nCompressionMethod,
            "nBytesArc": self.nBytesArc,
            "nBytesNodeAddress": self.nBytesNodeAddress,
            "nBytesOffset": self.nBytesOffset,
            # Mozilla’s JS parser don’t like file bigger than 4 Mb!
            # So, if necessary, we use an hexadecimal string, that we will convert later in Firefox’s extension.
            # https://github.com/mozilla/addons-linter/issues/1361
            #"sByDic": self.byDic.hex(),
            "lByDic": self.lByDic,
            "l2grams": list(self.a2grams)
        }

    def writeAsJSObject (self, spfDst, nCompressionMethod=1, bInJSModule=False):
        "write a file (JSON or JS module) with all the necessary data"
        if not spfDst.endswith(".json"):
            spfDst += "."+str(nCompressionMethod)+".json"
        with open(spfDst, "w", encoding="utf-8", newline="\n") as hDst:
            if bInJSModule:
                hDst.write('// JavaScript\n// Generated data (do not edit)\n\n"use strict";\n\nconst dictionary = ')
            hDst.write( json.dumps(self.getBinaryAsJSON(nCompressionMethod), ensure_ascii=False) )
            if bInJSModule:
                hDst.write(";\n\nexports.dictionary = dictionary;\n")

    def _getDate (self):
        return time.strftime("%Y-%m-%d %H:%M:%S")

    def _writeNodes (self, sPathFile, nCompressionMethod=1):
        "for debugging only"
        print(" > Write nodes")
        with open(sPathFile+".nodes."+str(nCompressionMethod)+".txt", 'w', encoding='utf-8', newline="\n") as hDst:
            hDst.write(self.oRoot.getTxtRepr(self.nBytesArc, self.lArcVal)+"\n")
            #hDst.write( ''.join( [ "%02X " %  z  for z in self.oRoot.convToBytes(self.nBytesArc, self.nBytesNodeAddress) ] ).strip() )
            for oNode in self.lMinimizedNodes:
                hDst.write(oNode.getTxtRepr(self.nBytesArc, self.lArcVal)+"\n")










|







 







>
>
>
>
>
>
>







 







|

|







 







<











|


|

<
<
|
<
<




|


|







354
355
356
357
358
359
360
361
362
363
364
365
366
367
368
...
383
384
385
386
387
388
389
390
391
392
393
394
395
396
397
398
399
400
401
402
403
...
405
406
407
408
409
410
411
412
413
414
415
416
417
418
419
420
421
...
427
428
429
430
431
432
433

434
435
436
437
438
439
440
441
442
443
444
445
446
447
448
449


450


451
452
453
454
455
456
457
458
459
460
461
462
463
464
465
                sEntry = sWord + "\t" + self.funcStemming(sWord, self.lArcVal[nVal])
                for nMorphVal, _ in oNextNode.arcs.items():
                    if not zPattern or zPattern.search(self.lArcVal[nMorphVal]):
                        yield sEntry + "\t" + self.lArcVal[nMorphVal]


    # BINARY CONVERSION
    def _calculateBinary (self):
        print(" > Write DAWG as an indexable binary dictionary")
        self.nBytesArc = ( (self.nArcVal.bit_length() + 2) // 8 ) + 1   # We add 2 bits. See DawgNode.convToBytes()
        self.nBytesOffset = 0
        self._calcNumBytesNodeAddress()
        self._calcNodesAddress()
        self.byDic = b""
        self.byDic = self.oRoot.convToBytes(self.nBytesArc, self.nBytesNodeAddress)
................................................................................
        nBytesNode = self.nBytesArc + self.nBytesNodeAddress
        iAddr = len(self.oRoot.arcs) * nBytesNode
        for oNode in self.lMinimizedNodes:
            oNode.addr = iAddr
            iAddr += max(len(oNode.arcs), 1) * nBytesNode

    def _binaryToList (self):
        """
        Convert binary string to binary list
        BEFORE: Arc                 Address                                 Arc                 Address
                ||||||||| ||||||||| ||||||||| ||||||||| ||||||||| ||||||||| ||||||||| ||||||||| ||||||||| ||||||||| ||||||||| ||||||||| ...

        AFTER:  list of integers: [ arc, address, arc, address, arc, address, ... arc, address ]
        """
        self.lByDic = []
        nAcc = 0
        byBuffer = b""
        nDivisor = (self.nBytesArc + self.nBytesNodeAddress) / 2
        for i in range(0, len(self.byDic)):
            byBuffer += self.byDic[i:i+1]
            if nAcc == (self.nBytesArc - 1):
................................................................................
                byBuffer = b""
            elif nAcc == (self.nBytesArc + self.nBytesNodeAddress - 1):
                self.lByDic.append(round(int.from_bytes(byBuffer, byteorder="big") / nDivisor))
                byBuffer = b""
                nAcc = -1
            nAcc = nAcc + 1

    def getBinaryAsJSON (self):
        "return a JSON string containing all necessary data of the dictionary (compressed as a binary string)"
        self._calculateBinary()
        self._binaryToList()
        return {
            "sHeader": "/grammalecte-fsa/",
            "sLangCode": self.sLangCode,
            "sLangName": self.sLangName,
            "sDicName": self.sDicName,
            "sDescription": self.sDescription,
................................................................................
            "nTag": self.nTag,
            "cStemming": self.cStemming,
            "dChar": self.dChar,
            "nNode": self.nNode,
            "nArc": self.nArc,
            "nArcVal": self.nArcVal,
            "lArcVal": self.lArcVal,

            "nBytesArc": self.nBytesArc,
            "nBytesNodeAddress": self.nBytesNodeAddress,
            "nBytesOffset": self.nBytesOffset,
            # Mozilla’s JS parser don’t like file bigger than 4 Mb!
            # So, if necessary, we use an hexadecimal string, that we will convert later in Firefox’s extension.
            # https://github.com/mozilla/addons-linter/issues/1361
            #"sByDic": self.byDic.hex(),
            "lByDic": self.lByDic,
            "l2grams": list(self.a2grams)
        }

    def writeAsJSObject (self, spfDst):
        "write a file (JSON or JS module) with all the necessary data"
        if not spfDst.endswith(".json"):
            spfDst += ".json"
        with open(spfDst, "w", encoding="utf-8", newline="\n") as hDst:


            hDst.write( json.dumps(self.getBinaryAsJSON(), ensure_ascii=False) )



    def _getDate (self):
        return time.strftime("%Y-%m-%d %H:%M:%S")

    def _writeNodes (self, sPathFile):
        "for debugging only"
        print(" > Write nodes")
        with open(sPathFile+".nodes.txt", 'w', encoding='utf-8', newline="\n") as hDst:
            hDst.write(self.oRoot.getTxtRepr(self.nBytesArc, self.lArcVal)+"\n")
            #hDst.write( ''.join( [ "%02X " %  z  for z in self.oRoot.convToBytes(self.nBytesArc, self.nBytesNodeAddress) ] ).strip() )
            for oNode in self.lMinimizedNodes:
                hDst.write(oNode.getTxtRepr(self.nBytesArc, self.lArcVal)+"\n")



Modified graphspell/ibdawg.py from [953707753a] to [15700e29f3].

170
171
172
173
174
175
176
177
178
179
180
181
182
183
184
            self.lexicographer = importlib.import_module(".lexgraph_"+self.sLangCode, "grammalecte.graphspell")
        except ImportError:
            print("# No module <graphspell.lexgraph_"+self.sLangCode+".py>")

    def getInfo (self):
        "return string about the IBDAWG"
        return  "  Language: {0.sLangName}   Lang code: {0.sLangCode}   Dictionary name: {0.sDicName}" \
                "  Compression method: {0.nCompressionMethod:>2}   Date: {0.sDate}   Stemming: {0.cStemming}FX\n" \
                "  Arcs values:  {0.nArcVal:>10,} = {0.nChar:>5,} characters,  {0.nAff:>6,} affixes,  {0.nTag:>6,} tags\n" \
                "  Dictionary: {0.nEntry:>12,} entries,    {0.nNode:>11,} nodes,   {0.nArc:>11,} arcs\n" \
                "  Address size: {0.nBytesNodeAddress:>1} bytes,  Arc size: {0.nBytesArc:>1} bytes\n".format(self)

    def isValidToken (self, sToken):
        "checks if <sToken> is valid (if there is hyphens in <sToken>, <sToken> is split, each part is checked)"
        sToken = st.spellingNormalization(sToken)







|







170
171
172
173
174
175
176
177
178
179
180
181
182
183
184
            self.lexicographer = importlib.import_module(".lexgraph_"+self.sLangCode, "grammalecte.graphspell")
        except ImportError:
            print("# No module <graphspell.lexgraph_"+self.sLangCode+".py>")

    def getInfo (self):
        "return string about the IBDAWG"
        return  "  Language: {0.sLangName}   Lang code: {0.sLangCode}   Dictionary name: {0.sDicName}" \
                "  Date: {0.sDate}   Stemming: {0.cStemming}FX\n" \
                "  Arcs values:  {0.nArcVal:>10,} = {0.nChar:>5,} characters,  {0.nAff:>6,} affixes,  {0.nTag:>6,} tags\n" \
                "  Dictionary: {0.nEntry:>12,} entries,    {0.nNode:>11,} nodes,   {0.nArc:>11,} arcs\n" \
                "  Address size: {0.nBytesNodeAddress:>1} bytes,  Arc size: {0.nBytesArc:>1} bytes\n".format(self)

    def isValidToken (self, sToken):
        "checks if <sToken> is valid (if there is hyphens in <sToken>, <sToken> is split, each part is checked)"
        sToken = st.spellingNormalization(sToken)