Grammalecte  Diff

Differences From Artifact [ba2376ee6f]:

To Artifact [a299d0be58]:


24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
...
100
101
102
103
104
105
106

107
108
109
110
111
112
113
114
...
364
365
366
367
368
369
370
371
372
373
374
375
376
377
378
379
        This code is inspired from Steve Hanov’s DAWG, 2011. (http://stevehanov.ca/blog/index.php?id=115)
        We store suffix/affix codes and tags within the graph after the “real” word.
        A word is a list of numbers [ c1, c2, c3 . . . cN, iAffix, iTags]
        Each arc is an index in this.lArcVal, where are stored characters, suffix/affix codes for stemming and tags.
        Important: As usual, the last node (after ‘iTags’) is tagged final, AND the node after ‘cN’ is ALSO tagged final.
    */

    constructor (lEntrySrc, sLangName, cStemming, xProgressBarNode=null) {
        console.log("===== Direct Acyclic Word Graph - Minimal Acyclic Finite State Automaton =====");
        let funcStemmingGen = null;
        switch (cStemming.toUpperCase()) {
            case "A":
                funcStemmingGen = str_transform.defineAffixCode; break;
            case "S":
                funcStemmingGen = str_transform.defineSuffixCode; break;
................................................................................
        let lKeyVal = [];
        for (let c of dChar.keys()) { lKeyVal.push([dChar.get(c), dCharOccur.get(c)]); }
        for (let sAff of dAff.keys()) { lKeyVal.push([dAff.get(sAff)+nChar, dAffOccur.get(sAff)]); }
        for (let sTag of dTag.keys()) { lKeyVal.push([dTag.get(sTag)+nChar+nAff, dTagOccur.get(sTag)]); }
        let dValOccur = new Map(lKeyVal);
        lKeyVal.length = 0; // clear the array


        this.sLang = sLangName;
        this.nEntry = lWord.length;
        this.aPreviousEntry = [];
        oNodeCounter.reset();
        this.oRoot = new DawgNode();
        this.lUncheckedNodes = [];          // list of nodes that have not been checked for duplication.
        this.dMinimizedNodes = new Map();   // list of unique nodes that have been checked for duplication.
        this.nNode = 0;
................................................................................
            sByDic = this.oRoot.convToBytes1(this.nBytesArc, this.nBytesNodeAddress);
            for (let oNode of this.dMinimizedNodes.values()) {
                sByDic += oNode.convToBytes1(this.nBytesArc, this.nBytesNodeAddress);
            }
        }
        let oJSON = {
            "sName": this.sName,
            "nVersion": this.nMethod,
            "sHeader": this.sHeader,
            "lArcVal": this.lArcVal,
            "nArcVal": this.nArcVal,
            "byDic": sByDic,
            "sLang": this.sLang,
            "nChar": this.nChar,
            "nBytesArc": this.nBytesArc,
            "nBytesNodeAddress": this.nBytesNodeAddress,







|







 







>
|







 







|
|







24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
...
100
101
102
103
104
105
106
107
108
109
110
111
112
113
114
115
...
365
366
367
368
369
370
371
372
373
374
375
376
377
378
379
380
        This code is inspired from Steve Hanov’s DAWG, 2011. (http://stevehanov.ca/blog/index.php?id=115)
        We store suffix/affix codes and tags within the graph after the “real” word.
        A word is a list of numbers [ c1, c2, c3 . . . cN, iAffix, iTags]
        Each arc is an index in this.lArcVal, where are stored characters, suffix/affix codes for stemming and tags.
        Important: As usual, the last node (after ‘iTags’) is tagged final, AND the node after ‘cN’ is ALSO tagged final.
    */

    constructor (lEntrySrc, sLang, cStemming, xProgressBarNode=null) {
        console.log("===== Direct Acyclic Word Graph - Minimal Acyclic Finite State Automaton =====");
        let funcStemmingGen = null;
        switch (cStemming.toUpperCase()) {
            case "A":
                funcStemmingGen = str_transform.defineAffixCode; break;
            case "S":
                funcStemmingGen = str_transform.defineSuffixCode; break;
................................................................................
        let lKeyVal = [];
        for (let c of dChar.keys()) { lKeyVal.push([dChar.get(c), dCharOccur.get(c)]); }
        for (let sAff of dAff.keys()) { lKeyVal.push([dAff.get(sAff)+nChar, dAffOccur.get(sAff)]); }
        for (let sTag of dTag.keys()) { lKeyVal.push([dTag.get(sTag)+nChar+nAff, dTagOccur.get(sTag)]); }
        let dValOccur = new Map(lKeyVal);
        lKeyVal.length = 0; // clear the array

        this.sHeader = "/pyfsa/";
        this.sLang = sLang;
        this.nEntry = lWord.length;
        this.aPreviousEntry = [];
        oNodeCounter.reset();
        this.oRoot = new DawgNode();
        this.lUncheckedNodes = [];          // list of nodes that have not been checked for duplication.
        this.dMinimizedNodes = new Map();   // list of unique nodes that have been checked for duplication.
        this.nNode = 0;
................................................................................
            sByDic = this.oRoot.convToBytes1(this.nBytesArc, this.nBytesNodeAddress);
            for (let oNode of this.dMinimizedNodes.values()) {
                sByDic += oNode.convToBytes1(this.nBytesArc, this.nBytesNodeAddress);
            }
        }
        let oJSON = {
            "sName": this.sName,
            "nVersion": nMethod,
            "sHeader": this.sHeader + nMethod + "/",
            "lArcVal": this.lArcVal,
            "nArcVal": this.nArcVal,
            "byDic": sByDic,
            "sLang": this.sLang,
            "nChar": this.nChar,
            "nBytesArc": this.nBytesArc,
            "nBytesNodeAddress": this.nBytesNodeAddress,