Grammalecte  Check-in [a1b165e276]

Overview
Comment:[graphspell][core] tokenizer: rename ELPFX tokens to WORD_ELIDED
Downloads: Tarball | ZIP archive | SQL archive
Timelines: family | ancestors | descendants | both | core | graphspell | rg
Files: files | file ages | folders
SHA3-256: a1b165e27617202e63311b572053dbd71ca22c536d2e1186629ae12afa3f57a2
User & Date: olr on 2018-06-28 07:53:20
Original Comment: [graphspell][core] rename ELPFX tokens to WORD_ELIDED
Other Links: branch diff | manifest | tags
Context
2018-06-28
08:00
[graphspell] tokenizer: rename ORDINAL tokens to WORD_ORDINAL check-in: 20dbc28ded user: olr tags: graphspell, rg
07:53
[graphspell][core] tokenizer: rename ELPFX tokens to WORD_ELIDED check-in: a1b165e276 user: olr tags: core, graphspell, rg
2018-06-27
23:39
[build][fix] check regexes: memorize checked regexes check-in: 74d9c8e099 user: olr tags: build, rg
Changes

Modified gc_lang/fr/modules-js/lexicographe.js from [823f277d47] to [6f858d6849].

    83     83       [':O2', [" 2ᵉ pers.,", "Pronom : 2ᵉ personne"]],
    84     84       [':O3', [" 3ᵉ pers.,", "Pronom : 3ᵉ personne"]],
    85     85       [':C', [" conjonction,", "Conjonction"]],
    86     86       [':Ĉ', [" conjonction (él.),", "Conjonction (élément)"]],
    87     87       [':Cc', [" conjonction de coordination,", "Conjonction de coordination"]],
    88     88       [':Cs', [" conjonction de subordination,", "Conjonction de subordination"]],
    89     89       [':Ĉs', [" conjonction de subordination (él.),", "Conjonction de subordination (élément)"]],
    90         -    
           90  +
    91     91       [':Ñ', [" locution nominale (él.),", "Locution nominale (élément)"]],
    92     92       [':Â', [" locution adjectivale (él.),", "Locution adjectivale (élément)"]],
    93     93       [':Ṽ', [" locution verbale (él.),", "Locution verbale (élément)"]],
    94     94       [':Ŵ', [" locution adverbiale (él.),", "Locution adverbiale (élément)"]],
    95     95       [':Ŕ', [" locution prépositive (él.),", "Locution prépositive (élément)"]],
    96     96       [':Ĵ', [" locution interjective (él.),", "Locution interjective (élément)"]],
    97     97   
................................................................................
   261    261                   case 'LINK':
   262    262                       return {
   263    263                           sType: oToken.sType,
   264    264                           sValue: oToken.sValue.slice(0, 40) + "…",
   265    265                           aLabel: ["hyperlien"]
   266    266                       };
   267    267                       break;
   268         -                case 'ELPFX':
          268  +                case 'WORD_ELIDED':
   269    269                       let sTemp = oToken.sValue.replace("’", "").replace("'", "").replace("`", "").toLowerCase();
   270    270                       return {
   271    271                           sType: oToken.sType,
   272    272                           sValue: oToken.sValue,
   273    273                           aLabel: [_dElidedPrefix.gl_get(sTemp, "préfixe élidé inconnu")]
   274    274                       };
   275    275                       break;
................................................................................
   452    452           let aTokenList = this.getListOfTokens(sText.replace("'", "’").trim(), false);
   453    453           let iKey = 0;
   454    454           let aElem = [];
   455    455           do {
   456    456               let oToken = aTokenList[iKey];
   457    457               let sMorphLoc = '';
   458    458               let aTokenTempList = [oToken];
   459         -            if (oToken.sType == "WORD" || oToken.sType == "ELPFX"){
          459  +            if (oToken.sType == "WORD" || oToken.sType == "WORD_ELIDED"){
   460    460                   let iKeyTree = iKey + 1;
   461    461                   let oLocNode = this.oLocGraph[oToken.sValue.toLowerCase()];
   462    462                   while (oLocNode) {
   463    463                       let oTokenNext = aTokenList[iKeyTree];
   464    464                       iKeyTree++;
   465    465                       if (oTokenNext) {
   466    466                           oLocNode = oLocNode[oTokenNext.sValue.toLowerCase()];

Modified gc_lang/fr/webext/content_scripts/panel_lxg.css from [60aef30035] to [83fe0f37d1].

    86     86   }
    87     87   div.grammalecte_lxg_token_LOC {
    88     88       background-color: hsla(150, 50%, 30%, 1);
    89     89   }
    90     90   div.grammalecte_lxg_token_WORD {
    91     91       background-color: hsla(150, 50%, 50%, 1);
    92     92   }
    93         -div.grammalecte_lxg_token_ELPFX {
           93  +div.grammalecte_lxg_token_WORD_ELIDED {
    94     94       background-color: hsla(150, 30%, 50%, 1);
    95     95   }
    96     96   div.grammalecte_lxg_token_UNKNOWN {
    97     97       background-color: hsla(0, 50%, 50%, 1);
    98     98   }
    99     99   div.grammalecte_lxg_token_NUM {
   100    100       background-color: hsla(180, 50%, 50%, 1);

Modified gc_lang/fr/xpi/data/lxg_panel.css from [3d666aa76c] to [0f0ad23b15].

    54     54       padding: 2px 5px;
    55     55       border-radius: 2px;
    56     56       text-decoration: none;
    57     57   }
    58     58   #wordlist b.WORD {
    59     59       background-color: hsla(150, 50%, 50%, 1);
    60     60   }
    61         -#wordlist b.ELPFX {
           61  +#wordlist b.WORD_ELIDED {
    62     62       background-color: hsla(150, 30%, 50%, 1);
    63     63   }
    64     64   #wordlist b.UNKNOWN {
    65     65       background-color: hsla(0, 50%, 50%, 1);
    66     66   }
    67     67   #wordlist b.NUM {
    68     68       background-color: hsla(180, 50%, 50%, 1);

Modified graphspell-js/tokenizer.js from [5f94dc04ea] to [8dd855b1b3].

    38     38               [/^[,.;:!?…«»“”‘’"(){}\[\]·–—]/, 'SEPARATOR'],
    39     39               [/^[A-Z][.][A-Z][.](?:[A-Z][.])*/, 'ACRONYM'],
    40     40               [/^(?:https?:\/\/|www[.]|[a-zA-Zà-öÀ-Ö0-9ø-ÿØ-ßĀ-ʯfi-st_-]+[@.][a-zA-Zà-öÀ-Ö0-9ø-ÿØ-ßĀ-ʯfi-st_-]{2,}[@.])[a-zA-Z0-9][a-zA-Zà-öÀ-Ö0-9ø-ÿØ-ßĀ-ʯfi-st_.\/?&!%=+*"'@$#-]+/, 'LINK'],
    41     41               [/^[#@][a-zA-Zà-öÀ-Ö0-9ø-ÿØ-ßĀ-ʯfi-st_-]+/, 'TAG'],
    42     42               [/^<[a-zA-Zà-öÀ-Ö0-9ø-ÿØ-ßĀ-ʯfi-st]+.*?>|<\/[a-zA-Zà-öÀ-Ö0-9ø-ÿØ-ßĀ-ʯfi-st]+ *>/, 'HTML'],
    43     43               [/^\[\/?[a-zA-Zà-öÀ-Ö0-9ø-ÿØ-ßĀ-ʯfi-st]+\]/, 'PSEUDOHTML'],
    44     44               [/^&\w+;(?:\w+;|)/, 'HTMLENTITY'],
    45         -            [/^(?:l|d|n|m|t|s|j|c|ç|lorsqu|puisqu|jusqu|quoiqu|qu)['’`]/i, 'ELPFX'],
           45  +            [/^(?:l|d|n|m|t|s|j|c|ç|lorsqu|puisqu|jusqu|quoiqu|qu)['’`]/i, 'WORD_ELIDED'],
    46     46               [/^\d\d?[hm]\d\d\b/, 'HOUR'],
    47     47               [/^\d+(?:ers?|nds?|es?|des?|ièmes?|èmes?|emes?|ᵉʳˢ?|ⁿᵈˢ?|ᵉˢ?|ᵈᵉˢ?)\b/, 'ORDINAL'],
    48     48               [/^-?\d+(?:[.,]\d+|)/, 'NUM'],
    49     49               [/^[%‰+=*/<>⩾⩽-]/, 'SIGN'],
    50     50               [/^[a-zA-Zà-öÀ-Ö0-9ø-ÿØ-ßĀ-ʯfi-st]+(?:[’'`-][a-zA-Zà-öÀ-Ö0-9ø-ÿØ-ßĀ-ʯfi-st]+)*/, 'WORD']
    51     51           ]
    52     52   };

Modified graphspell/tokenizer.py from [7c766445e1] to [8cf6a6bb2e].

    27     27               r'(?P<FOLDERWIN>[a-zA-Z]:\\(?:Program Files(?: [(]x86[)]|)|[\w.()]+)(?:\\[\w.()-]+)*)',
    28     28               r'(?P<PUNC>[][,.;:!?…«»“”‘’"(){}·–—])',
    29     29               r'(?P<ACRONYM>[A-Z][.][A-Z][.](?:[A-Z][.])*)',
    30     30               r'(?P<LINK>(?:https?://|www[.]|\w+[@.]\w\w+[@.])\w[\w./?&!%=+*"\'@$#-]+)',
    31     31               r'(?P<HASHTAG>[#@][\w-]+)',
    32     32               r'(?P<HTML><\w+.*?>|</\w+ *>)',
    33     33               r'(?P<PSEUDOHTML>\[/?\w+\])',
    34         -            r"(?P<ELPFX>(?:l|d|n|m|t|s|j|c|ç|lorsqu|puisqu|jusqu|quoiqu|qu)['’`])",
           34  +            r"(?P<WORD_ELIDED>(?:l|d|n|m|t|s|j|c|ç|lorsqu|puisqu|jusqu|quoiqu|qu)['’`])",
    35     35               r'(?P<ORDINAL>\d+(?:ers?|nds?|es?|des?|ièmes?|èmes?|emes?|ᵉʳˢ?|ⁿᵈˢ?|ᵉˢ?|ᵈᵉˢ?)\b)',
    36     36               r'(?P<HOUR>\d\d?h\d\d\b)',
    37     37               r'(?P<NUM>-?\d+(?:[.,]\d+|))',
    38     38               r'(?P<SIGN>[%‰+=*/<>⩾⩽-])',
    39     39               r"(?P<WORD>\w+(?:[’'`-]\w+)*)"
    40     40           )
    41     41   }