Grammalecte  Check-in [a1b165e276]

Overview
Comment:[graphspell][core] tokenizer: rename ELPFX tokens to WORD_ELIDED
Downloads: Tarball | ZIP archive | SQL archive
Timelines: family | ancestors | descendants | both | core | graphspell | rg
Files: files | file ages | folders
SHA3-256: a1b165e27617202e63311b572053dbd71ca22c536d2e1186629ae12afa3f57a2
User & Date: olr on 2018-06-28 07:53:20
Original Comment: [graphspell][core] rename ELPFX tokens to WORD_ELIDED
Other Links: branch diff | manifest | tags
Context
2018-06-28
08:00
[graphspell] tokenizer: rename ORDINAL tokens to WORD_ORDINAL check-in: 20dbc28ded user: olr tags: graphspell, rg
07:53
[graphspell][core] tokenizer: rename ELPFX tokens to WORD_ELIDED check-in: a1b165e276 user: olr tags: core, graphspell, rg
2018-06-27
23:39
[build][fix] check regexes: memorize checked regexes check-in: 74d9c8e099 user: olr tags: build, rg
Changes

Modified gc_lang/fr/modules-js/lexicographe.js from [823f277d47] to [6f858d6849].

83
84
85
86
87
88
89
90
91
92
93
94
95
96
97
...
261
262
263
264
265
266
267
268
269
270
271
272
273
274
275
...
452
453
454
455
456
457
458
459
460
461
462
463
464
465
466
    [':O2', [" 2ᵉ pers.,", "Pronom : 2ᵉ personne"]],
    [':O3', [" 3ᵉ pers.,", "Pronom : 3ᵉ personne"]],
    [':C', [" conjonction,", "Conjonction"]],
    [':Ĉ', [" conjonction (él.),", "Conjonction (élément)"]],
    [':Cc', [" conjonction de coordination,", "Conjonction de coordination"]],
    [':Cs', [" conjonction de subordination,", "Conjonction de subordination"]],
    [':Ĉs', [" conjonction de subordination (él.),", "Conjonction de subordination (élément)"]],
    
    [':Ñ', [" locution nominale (él.),", "Locution nominale (élément)"]],
    [':Â', [" locution adjectivale (él.),", "Locution adjectivale (élément)"]],
    [':Ṽ', [" locution verbale (él.),", "Locution verbale (élément)"]],
    [':Ŵ', [" locution adverbiale (él.),", "Locution adverbiale (élément)"]],
    [':Ŕ', [" locution prépositive (él.),", "Locution prépositive (élément)"]],
    [':Ĵ', [" locution interjective (él.),", "Locution interjective (élément)"]],

................................................................................
                case 'LINK':
                    return {
                        sType: oToken.sType,
                        sValue: oToken.sValue.slice(0, 40) + "…",
                        aLabel: ["hyperlien"]
                    };
                    break;
                case 'ELPFX':
                    let sTemp = oToken.sValue.replace("’", "").replace("'", "").replace("`", "").toLowerCase();
                    return {
                        sType: oToken.sType,
                        sValue: oToken.sValue,
                        aLabel: [_dElidedPrefix.gl_get(sTemp, "préfixe élidé inconnu")]
                    };
                    break;
................................................................................
        let aTokenList = this.getListOfTokens(sText.replace("'", "’").trim(), false);
        let iKey = 0;
        let aElem = [];
        do {
            let oToken = aTokenList[iKey];
            let sMorphLoc = '';
            let aTokenTempList = [oToken];
            if (oToken.sType == "WORD" || oToken.sType == "ELPFX"){
                let iKeyTree = iKey + 1;
                let oLocNode = this.oLocGraph[oToken.sValue.toLowerCase()];
                while (oLocNode) {
                    let oTokenNext = aTokenList[iKeyTree];
                    iKeyTree++;
                    if (oTokenNext) {
                        oLocNode = oLocNode[oTokenNext.sValue.toLowerCase()];







|







 







|







 







|







83
84
85
86
87
88
89
90
91
92
93
94
95
96
97
...
261
262
263
264
265
266
267
268
269
270
271
272
273
274
275
...
452
453
454
455
456
457
458
459
460
461
462
463
464
465
466
    [':O2', [" 2ᵉ pers.,", "Pronom : 2ᵉ personne"]],
    [':O3', [" 3ᵉ pers.,", "Pronom : 3ᵉ personne"]],
    [':C', [" conjonction,", "Conjonction"]],
    [':Ĉ', [" conjonction (él.),", "Conjonction (élément)"]],
    [':Cc', [" conjonction de coordination,", "Conjonction de coordination"]],
    [':Cs', [" conjonction de subordination,", "Conjonction de subordination"]],
    [':Ĉs', [" conjonction de subordination (él.),", "Conjonction de subordination (élément)"]],

    [':Ñ', [" locution nominale (él.),", "Locution nominale (élément)"]],
    [':Â', [" locution adjectivale (él.),", "Locution adjectivale (élément)"]],
    [':Ṽ', [" locution verbale (él.),", "Locution verbale (élément)"]],
    [':Ŵ', [" locution adverbiale (él.),", "Locution adverbiale (élément)"]],
    [':Ŕ', [" locution prépositive (él.),", "Locution prépositive (élément)"]],
    [':Ĵ', [" locution interjective (él.),", "Locution interjective (élément)"]],

................................................................................
                case 'LINK':
                    return {
                        sType: oToken.sType,
                        sValue: oToken.sValue.slice(0, 40) + "…",
                        aLabel: ["hyperlien"]
                    };
                    break;
                case 'WORD_ELIDED':
                    let sTemp = oToken.sValue.replace("’", "").replace("'", "").replace("`", "").toLowerCase();
                    return {
                        sType: oToken.sType,
                        sValue: oToken.sValue,
                        aLabel: [_dElidedPrefix.gl_get(sTemp, "préfixe élidé inconnu")]
                    };
                    break;
................................................................................
        let aTokenList = this.getListOfTokens(sText.replace("'", "’").trim(), false);
        let iKey = 0;
        let aElem = [];
        do {
            let oToken = aTokenList[iKey];
            let sMorphLoc = '';
            let aTokenTempList = [oToken];
            if (oToken.sType == "WORD" || oToken.sType == "WORD_ELIDED"){
                let iKeyTree = iKey + 1;
                let oLocNode = this.oLocGraph[oToken.sValue.toLowerCase()];
                while (oLocNode) {
                    let oTokenNext = aTokenList[iKeyTree];
                    iKeyTree++;
                    if (oTokenNext) {
                        oLocNode = oLocNode[oTokenNext.sValue.toLowerCase()];

Modified gc_lang/fr/webext/content_scripts/panel_lxg.css from [60aef30035] to [83fe0f37d1].

86
87
88
89
90
91
92
93
94
95
96
97
98
99
100
}
div.grammalecte_lxg_token_LOC {
    background-color: hsla(150, 50%, 30%, 1);
}
div.grammalecte_lxg_token_WORD {
    background-color: hsla(150, 50%, 50%, 1);
}
div.grammalecte_lxg_token_ELPFX {
    background-color: hsla(150, 30%, 50%, 1);
}
div.grammalecte_lxg_token_UNKNOWN {
    background-color: hsla(0, 50%, 50%, 1);
}
div.grammalecte_lxg_token_NUM {
    background-color: hsla(180, 50%, 50%, 1);







|







86
87
88
89
90
91
92
93
94
95
96
97
98
99
100
}
div.grammalecte_lxg_token_LOC {
    background-color: hsla(150, 50%, 30%, 1);
}
div.grammalecte_lxg_token_WORD {
    background-color: hsla(150, 50%, 50%, 1);
}
div.grammalecte_lxg_token_WORD_ELIDED {
    background-color: hsla(150, 30%, 50%, 1);
}
div.grammalecte_lxg_token_UNKNOWN {
    background-color: hsla(0, 50%, 50%, 1);
}
div.grammalecte_lxg_token_NUM {
    background-color: hsla(180, 50%, 50%, 1);

Modified gc_lang/fr/xpi/data/lxg_panel.css from [3d666aa76c] to [0f0ad23b15].

54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
    padding: 2px 5px;
    border-radius: 2px;
    text-decoration: none;
}
#wordlist b.WORD {
    background-color: hsla(150, 50%, 50%, 1);
}
#wordlist b.ELPFX {
    background-color: hsla(150, 30%, 50%, 1);
}
#wordlist b.UNKNOWN {
    background-color: hsla(0, 50%, 50%, 1);
}
#wordlist b.NUM {
    background-color: hsla(180, 50%, 50%, 1);







|







54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
    padding: 2px 5px;
    border-radius: 2px;
    text-decoration: none;
}
#wordlist b.WORD {
    background-color: hsla(150, 50%, 50%, 1);
}
#wordlist b.WORD_ELIDED {
    background-color: hsla(150, 30%, 50%, 1);
}
#wordlist b.UNKNOWN {
    background-color: hsla(0, 50%, 50%, 1);
}
#wordlist b.NUM {
    background-color: hsla(180, 50%, 50%, 1);

Modified graphspell-js/tokenizer.js from [5f94dc04ea] to [8dd855b1b3].

38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
            [/^[,.;:!?…«»“”‘’"(){}\[\]·–—]/, 'SEPARATOR'],
            [/^[A-Z][.][A-Z][.](?:[A-Z][.])*/, 'ACRONYM'],
            [/^(?:https?:\/\/|www[.]|[a-zA-Zà-öÀ-Ö0-9ø-ÿØ-ßĀ-ʯfi-st_-]+[@.][a-zA-Zà-öÀ-Ö0-9ø-ÿØ-ßĀ-ʯfi-st_-]{2,}[@.])[a-zA-Z0-9][a-zA-Zà-öÀ-Ö0-9ø-ÿØ-ßĀ-ʯfi-st_.\/?&!%=+*"'@$#-]+/, 'LINK'],
            [/^[#@][a-zA-Zà-öÀ-Ö0-9ø-ÿØ-ßĀ-ʯfi-st_-]+/, 'TAG'],
            [/^<[a-zA-Zà-öÀ-Ö0-9ø-ÿØ-ßĀ-ʯfi-st]+.*?>|<\/[a-zA-Zà-öÀ-Ö0-9ø-ÿØ-ßĀ-ʯfi-st]+ *>/, 'HTML'],
            [/^\[\/?[a-zA-Zà-öÀ-Ö0-9ø-ÿØ-ßĀ-ʯfi-st]+\]/, 'PSEUDOHTML'],
            [/^&\w+;(?:\w+;|)/, 'HTMLENTITY'],
            [/^(?:l|d|n|m|t|s|j|c|ç|lorsqu|puisqu|jusqu|quoiqu|qu)['’`]/i, 'ELPFX'],
            [/^\d\d?[hm]\d\d\b/, 'HOUR'],
            [/^\d+(?:ers?|nds?|es?|des?|ièmes?|èmes?|emes?|ᵉʳˢ?|ⁿᵈˢ?|ᵉˢ?|ᵈᵉˢ?)\b/, 'ORDINAL'],
            [/^-?\d+(?:[.,]\d+|)/, 'NUM'],
            [/^[%‰+=*/<>⩾⩽-]/, 'SIGN'],
            [/^[a-zA-Zà-öÀ-Ö0-9ø-ÿØ-ßĀ-ʯfi-st]+(?:[’'`-][a-zA-Zà-öÀ-Ö0-9ø-ÿØ-ßĀ-ʯfi-st]+)*/, 'WORD']
        ]
};







|







38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
            [/^[,.;:!?…«»“”‘’"(){}\[\]·–—]/, 'SEPARATOR'],
            [/^[A-Z][.][A-Z][.](?:[A-Z][.])*/, 'ACRONYM'],
            [/^(?:https?:\/\/|www[.]|[a-zA-Zà-öÀ-Ö0-9ø-ÿØ-ßĀ-ʯfi-st_-]+[@.][a-zA-Zà-öÀ-Ö0-9ø-ÿØ-ßĀ-ʯfi-st_-]{2,}[@.])[a-zA-Z0-9][a-zA-Zà-öÀ-Ö0-9ø-ÿØ-ßĀ-ʯfi-st_.\/?&!%=+*"'@$#-]+/, 'LINK'],
            [/^[#@][a-zA-Zà-öÀ-Ö0-9ø-ÿØ-ßĀ-ʯfi-st_-]+/, 'TAG'],
            [/^<[a-zA-Zà-öÀ-Ö0-9ø-ÿØ-ßĀ-ʯfi-st]+.*?>|<\/[a-zA-Zà-öÀ-Ö0-9ø-ÿØ-ßĀ-ʯfi-st]+ *>/, 'HTML'],
            [/^\[\/?[a-zA-Zà-öÀ-Ö0-9ø-ÿØ-ßĀ-ʯfi-st]+\]/, 'PSEUDOHTML'],
            [/^&\w+;(?:\w+;|)/, 'HTMLENTITY'],
            [/^(?:l|d|n|m|t|s|j|c|ç|lorsqu|puisqu|jusqu|quoiqu|qu)['’`]/i, 'WORD_ELIDED'],
            [/^\d\d?[hm]\d\d\b/, 'HOUR'],
            [/^\d+(?:ers?|nds?|es?|des?|ièmes?|èmes?|emes?|ᵉʳˢ?|ⁿᵈˢ?|ᵉˢ?|ᵈᵉˢ?)\b/, 'ORDINAL'],
            [/^-?\d+(?:[.,]\d+|)/, 'NUM'],
            [/^[%‰+=*/<>⩾⩽-]/, 'SIGN'],
            [/^[a-zA-Zà-öÀ-Ö0-9ø-ÿØ-ßĀ-ʯfi-st]+(?:[’'`-][a-zA-Zà-öÀ-Ö0-9ø-ÿØ-ßĀ-ʯfi-st]+)*/, 'WORD']
        ]
};

Modified graphspell/tokenizer.py from [7c766445e1] to [8cf6a6bb2e].

27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
            r'(?P<FOLDERWIN>[a-zA-Z]:\\(?:Program Files(?: [(]x86[)]|)|[\w.()]+)(?:\\[\w.()-]+)*)',
            r'(?P<PUNC>[][,.;:!?…«»“”‘’"(){}·–—])',
            r'(?P<ACRONYM>[A-Z][.][A-Z][.](?:[A-Z][.])*)',
            r'(?P<LINK>(?:https?://|www[.]|\w+[@.]\w\w+[@.])\w[\w./?&!%=+*"\'@$#-]+)',
            r'(?P<HASHTAG>[#@][\w-]+)',
            r'(?P<HTML><\w+.*?>|</\w+ *>)',
            r'(?P<PSEUDOHTML>\[/?\w+\])',
            r"(?P<ELPFX>(?:l|d|n|m|t|s|j|c|ç|lorsqu|puisqu|jusqu|quoiqu|qu)['’`])",
            r'(?P<ORDINAL>\d+(?:ers?|nds?|es?|des?|ièmes?|èmes?|emes?|ᵉʳˢ?|ⁿᵈˢ?|ᵉˢ?|ᵈᵉˢ?)\b)',
            r'(?P<HOUR>\d\d?h\d\d\b)',
            r'(?P<NUM>-?\d+(?:[.,]\d+|))',
            r'(?P<SIGN>[%‰+=*/<>⩾⩽-])',
            r"(?P<WORD>\w+(?:[’'`-]\w+)*)"
        )
}







|







27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
            r'(?P<FOLDERWIN>[a-zA-Z]:\\(?:Program Files(?: [(]x86[)]|)|[\w.()]+)(?:\\[\w.()-]+)*)',
            r'(?P<PUNC>[][,.;:!?…«»“”‘’"(){}·–—])',
            r'(?P<ACRONYM>[A-Z][.][A-Z][.](?:[A-Z][.])*)',
            r'(?P<LINK>(?:https?://|www[.]|\w+[@.]\w\w+[@.])\w[\w./?&!%=+*"\'@$#-]+)',
            r'(?P<HASHTAG>[#@][\w-]+)',
            r'(?P<HTML><\w+.*?>|</\w+ *>)',
            r'(?P<PSEUDOHTML>\[/?\w+\])',
            r"(?P<WORD_ELIDED>(?:l|d|n|m|t|s|j|c|ç|lorsqu|puisqu|jusqu|quoiqu|qu)['’`])",
            r'(?P<ORDINAL>\d+(?:ers?|nds?|es?|des?|ièmes?|èmes?|emes?|ᵉʳˢ?|ⁿᵈˢ?|ᵉˢ?|ᵈᵉˢ?)\b)',
            r'(?P<HOUR>\d\d?h\d\d\b)',
            r'(?P<NUM>-?\d+(?:[.,]\d+|))',
            r'(?P<SIGN>[%‰+=*/<>⩾⩽-])',
            r"(?P<WORD>\w+(?:[’'`-]\w+)*)"
        )
}