Grammalecte  Check-in [72f63bddd2]

Overview
Comment:[graphspell][js] tokenizer: update \w replacement again
Downloads: Tarball | ZIP archive | SQL archive
Timelines: family | ancestors | descendants | both | graphspell | rg
Files: files | file ages | folders
SHA3-256: 72f63bddd22d08a214557d990966fd943703d9fd09e4cd9aa2035b219b715633
User & Date: olr on 2018-09-17 12:06:16
Other Links: branch diff | manifest | tags
Context
2018-09-17
12:07
[fr][bug] mauvais token sélectionné check-in: d877acf9d9 user: olr tags: fr, rg
12:06
[graphspell][js] tokenizer: update \w replacement again check-in: 72f63bddd2 user: olr tags: graphspell, rg
11:32
[fr][bugs] remove useless parameters check-in: 8b769e5c52 user: olr tags: fr, rg
Changes

Modified graphspell-js/tokenizer.js from [d579c92281] to [88bacac87d].

    19     19               [/^[#@][a-zA-Zà-öÀ-Ö0-9ø-ÿØ-ßĀ-ʯfi-st_-]+/, 'TAG'],
    20     20               [/^<[a-zA-Zà-öÀ-Ö0-9ø-ÿØ-ßĀ-ʯfi-st]+.*?>|<\/[a-zA-Zà-öÀ-Ö0-9ø-ÿØ-ßĀ-ʯfi-st]+ *>/, 'HTML'],
    21     21               [/^\[\/?[a-zA-Zà-öÀ-Ö0-9ø-ÿØ-ßĀ-ʯfi-st]+\]/, 'PSEUDOHTML'],
    22     22               [/^&\w+;(?:\w+;|)/, 'HTMLENTITY'],
    23     23               [/^\d\d?h\d\d\b/, 'HOUR'],
    24     24               [/^\d+(?:[.,]\d+|)/, 'NUM'],
    25     25               [/^[%‰+=*/<>⩾⩽-]/, 'SIGN'],
    26         -            [/^[a-zA-Zà-öÀ-Ö0-9ø-ÿØ-ßĀ-ʯfi-stᴀ-ᶿ]+(?:[’'`-][a-zA-Zà-öÀ-Ö0-9ø-ÿØ-ßĀ-ʯfi-stᴀ-ᶿ]+)*/, 'WORD']
           26  +            [/^[a-zA-Zà-öÀ-Ö0-9ø-ÿØ-ßĀ-ʯfi-stᴀ-ᶿ_]+(?:[’'`-][a-zA-Zà-öÀ-Ö0-9ø-ÿØ-ßĀ-ʯfi-stᴀ-ᶿ_]+)*/, 'WORD']
    27     27           ],
    28     28       "fr":
    29     29           [
    30     30               [/^[   \t]+/, 'SPACE'],
    31     31               [/^\/(?:~|bin|boot|dev|etc|home|lib|mnt|opt|root|sbin|tmp|usr|var|Bureau|Documents|Images|Musique|Public|Téléchargements|Vidéos)(?:\/[a-zA-Zà-öÀ-Ö0-9ø-ÿØ-ßĀ-ʯfi-st_.()-]+)*/, 'FOLDERUNIX'],
    32     32               [/^[a-zA-Z]:\\(?:Program Files(?: \(x86\)|)|[a-zA-Zà-öÀ-Ö0-9ø-ÿØ-ßĀ-ʯfi-st.()]+)(?:\\[a-zA-Zà-öÀ-Ö0-9ø-ÿØ-ßĀ-ʯfi-st_.()-]+)*/, 'FOLDERWIN'],
    33     33               [/^[,.;:!?…«»“”‘’"(){}\[\]·–—]/, 'SEPARATOR'],
................................................................................
    38     38               [/^\[\/?[a-zA-Zà-öÀ-Ö0-9ø-ÿØ-ßĀ-ʯfi-st]+\]/, 'PSEUDOHTML'],
    39     39               [/^&\w+;(?:\w+;|)/, 'HTMLENTITY'],
    40     40               [/^(?:l|d|n|m|t|s|j|c|ç|lorsqu|puisqu|jusqu|quoiqu|qu)['’`]/i, 'WORD_ELIDED'],
    41     41               [/^\d\d?[hm]\d\d\b/, 'HOUR'],
    42     42               [/^\d+(?:ers?\b|nds?\b|es?\b|des?\b|ièmes?\b|èmes?\b|emes?\b|ᵉʳˢ?|ⁿᵈˢ?|ᵉˢ?|ᵈᵉˢ?)/, 'WORD_ORDINAL'],
    43     43               [/^\d+(?:[.,]\d+|)/, 'NUM'],
    44     44               [/^[%‰+=*/<>⩾⩽-]/, 'SIGN'],
    45         -            [/^[a-zA-Zà-öÀ-Ö0-9ø-ÿØ-ßĀ-ʯfi-stᴀ-ᶿ]+(?:[’'`-][a-zA-Zà-öÀ-Ö0-9ø-ÿØ-ßĀ-ʯfi-stᴀ-ᶿ]+)*/, 'WORD']
           45  +            [/^[a-zA-Zà-öÀ-Ö0-9ø-ÿØ-ßĀ-ʯfi-stᴀ-ᶿ_]+(?:[’'`-][a-zA-Zà-öÀ-Ö0-9ø-ÿØ-ßĀ-ʯfi-stᴀ-ᶿ_]+)*/, 'WORD']
    46     46           ]
    47     47   };
    48     48   
    49     49   
    50     50   class Tokenizer {
    51     51   
    52     52       constructor (sLang) {