Grammalecte  Check-in [7f03f6c55a]

Many hyperlinks are disabled.
Use anonymous login to enable hyperlinks.

Overview
Comment:[graphspell][js] fucking \w substitution again
Downloads: Tarball | ZIP archive | SQL archive
Timelines: family | ancestors | descendants | both | trunk | graphspell
Files: files | file ages | folders
SHA3-256:7f03f6c55ae97cbb95e76db4806b364096aaed2d44978aee23b60b22654f2900
User & Date: olr 2018-12-26 18:25:37
Context
2018-12-27
11:27
[fr] nr: compagnons d’armes, nr: confusion V1infi/adjectif check-in: b4b35f49c5 user: olr tags: fr, trunk
2018-12-26
18:25
[graphspell][js] fucking \w substitution again check-in: 7f03f6c55a user: olr tags: graphspell, trunk
18:10
[graphspell][js] fucking \w substitution again check-in: 78254a6629 user: olr tags: graphspell, trunk
Changes

Changes to graphspell-js/tokenizer.js.

40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
            [/^\[\/?[a-zA-Zà-öÀ-Ö0-9ø-ÿØ-ßĀ-ʯfi-st]+\]/, 'PSEUDOHTML'],
            [/^&\w+;(?:\w+;|)/, 'HTMLENTITY'],
            [/^(?:l|d|n|m|t|s|j|c|ç|lorsqu|puisqu|jusqu|quoiqu|qu)['’`]/i, 'WORD_ELIDED'],
            [/^\d\d?[hm]\d\d\b/, 'HOUR'],
            [/^\d+(?:ers?\b|nds?\b|es?\b|des?\b|ièmes?\b|èmes?\b|emes?\b|ᵉʳˢ?|ⁿᵈˢ?|ᵉˢ?|ᵈᵉˢ?)/, 'WORD_ORDINAL'],
            [/^\d+(?:[.,]\d+|)/, 'NUM'],
            [/^[%‰+=*/<>⩾⩽-]/, 'SIGN'],
            [/^[a-zA-Zà-öÀ-Ö0-9ø-ÿØ-ßĀ-ʯfi-stᴀ-ᶿˢᵈ_]+(?:[’'`-][a-zA-Zà-öÀ-Ö0-9ø-ÿØ-ßĀ-ʯfi-stᴀ-ᶿˢᵈ_]+)*/, 'WORD']
        ]
};


class Tokenizer {

    constructor (sLang) {







|







40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
            [/^\[\/?[a-zA-Zà-öÀ-Ö0-9ø-ÿØ-ßĀ-ʯfi-st]+\]/, 'PSEUDOHTML'],
            [/^&\w+;(?:\w+;|)/, 'HTMLENTITY'],
            [/^(?:l|d|n|m|t|s|j|c|ç|lorsqu|puisqu|jusqu|quoiqu|qu)['’`]/i, 'WORD_ELIDED'],
            [/^\d\d?[hm]\d\d\b/, 'HOUR'],
            [/^\d+(?:ers?\b|nds?\b|es?\b|des?\b|ièmes?\b|èmes?\b|emes?\b|ᵉʳˢ?|ⁿᵈˢ?|ᵉˢ?|ᵈᵉˢ?)/, 'WORD_ORDINAL'],
            [/^\d+(?:[.,]\d+|)/, 'NUM'],
            [/^[%‰+=*/<>⩾⩽-]/, 'SIGN'],
            [/^[a-zA-Zà-öÀ-Ö0-9ø-ÿØ-ßĀ-ʯfi-stᴀ-ᶿᵉʳˢⁿᵈ_]+(?:[’'`-][a-zA-Zà-öÀ-Ö0-9ø-ÿØ-ßĀ-ʯfi-stᴀ-ᶿᵉʳˢⁿᵈ_]+)*/, 'WORD']
        ]
};


class Tokenizer {

    constructor (sLang) {