Grammalecte  Check-in [e40149ad94]

Many hyperlinks are disabled.
Use anonymous login to enable hyperlinks.

Overview
Comment:[core][graphspell][js] fix regex for \w substitution
Downloads: Tarball | ZIP archive | SQL archive
Timelines: family | ancestors | descendants | both | trunk | core | graphspell
Files: files | file ages | folders
SHA3-256:e40149ad940e2fc32f79fa4cdef99e3bd3c3363171df2f73fb228e0314ec4ad6
User & Date: olr 2019-05-22 07:59:29
Context
2019-05-22
08:24
[graphspell][js] tokenizer: tag SEPARATOR -> PUNC check-in: 75bf92c9c2 user: olr tags: graphspell, trunk
07:59
[core][graphspell][js] fix regex for \w substitution check-in: e40149ad94 user: olr tags: core, graphspell, trunk
07:11
[fr] écriture épicène: ajustements check-in: 02d41c9147 user: olr tags: fr, trunk
Changes

Changes to gc_core/js/lang_core/gc_engine.js.

   988    988   
   989    989   //////// functions to get text outside pattern scope
   990    990   
   991    991   // warning: check compile_rules.py to understand how it works
   992    992   
   993    993   function nextword (s, iStart, n) {
   994    994       // get the nth word of the input string or empty string
   995         -    let z = new RegExp("^(?: +[a-zà-öA-Zø-ÿÀ-Ö0-9Ø-ßĀ-ʯfi-stᴀ-ᶿ%_-]+){" + (n-1).toString() + "} +([a-zà-öA-Zø-ÿÀ-Ö0-9Ø-ßĀ-ʯfi-stᴀ-ᶿ%_-]+)", "ig");
          995  +    let z = new RegExp("^(?: +[a-zà-öA-Zø-ÿÀ-Ö0-9Ø-ßĀ-ʯff-stᴀ-ᶿ%_-]+){" + (n-1).toString() + "} +([a-zà-öA-Zø-ÿÀ-Ö0-9Ø-ßĀ-ʯff-stᴀ-ᶿ%_-]+)", "ig");
   996    996       let m = z.exec(s.slice(iStart));
   997    997       if (!m) {
   998    998           return null;
   999    999       }
  1000   1000       return [iStart + z.lastIndex - m[1].length, m[1]];
  1001   1001   }
  1002   1002   
  1003   1003   function prevword (s, iEnd, n) {
  1004   1004       // get the (-)nth word of the input string or empty string
  1005         -    let z = new RegExp("([a-zà-öA-Zø-ÿÀ-Ö0-9Ø-ßĀ-ʯfi-stᴀ-ᶿ%_-]+) +(?:[a-zà-öA-Zø-ÿÀ-Ö0-9Ø-ßĀ-ʯfi-stᴀ-ᶿ%_-]+ +){" + (n-1).toString() + "}$", "i");
         1005  +    let z = new RegExp("([a-zà-öA-Zø-ÿÀ-Ö0-9Ø-ßĀ-ʯff-stᴀ-ᶿ%_-]+) +(?:[a-zà-öA-Zø-ÿÀ-Ö0-9Ø-ßĀ-ʯff-stᴀ-ᶿ%_-]+ +){" + (n-1).toString() + "}$", "i");
  1006   1006       let m = z.exec(s.slice(0, iEnd));
  1007   1007       if (!m) {
  1008   1008           return null;
  1009   1009       }
  1010   1010       return [m.index, m[1]];
  1011   1011   }
  1012   1012   
  1013   1013   function nextword1 (s, iStart) {
  1014   1014       // get next word (optimization)
  1015         -    let _zNextWord = new RegExp ("^ +([a-zà-öA-Zø-ÿÀ-Ö0-9Ø-ßĀ-ʯfi-stᴀ-ᶿ_][a-zà-öA-Zø-ÿÀ-Ö0-9Ø-ßĀ-ʯfi-stᴀ-ᶿ_-]*)", "ig");
         1015  +    let _zNextWord = new RegExp ("^ +([a-zà-öA-Zø-ÿÀ-Ö0-9Ø-ßĀ-ʯff-stᴀ-ᶿ_][a-zà-öA-Zø-ÿÀ-Ö0-9Ø-ßĀ-ʯff-stᴀ-ᶿ_-]*)", "ig");
  1016   1016       let m = _zNextWord.exec(s.slice(iStart));
  1017   1017       if (!m) {
  1018   1018           return null;
  1019   1019       }
  1020   1020       return [iStart + _zNextWord.lastIndex - m[1].length, m[1]];
  1021   1021   }
  1022   1022   
  1023         -const _zPrevWord = new RegExp ("([a-zà-öA-Zø-ÿÀ-Ö0-9Ø-ßĀ-ʯfi-stᴀ-ᶿ_][a-zà-öA-Zø-ÿÀ-Ö0-9Ø-ßĀ-ʯfi-stᴀ-ᶿ_-]*) +$", "i");
         1023  +const _zPrevWord = new RegExp ("([a-zà-öA-Zø-ÿÀ-Ö0-9Ø-ßĀ-ʯff-stᴀ-ᶿ_][a-zà-öA-Zø-ÿÀ-Ö0-9Ø-ßĀ-ʯff-stᴀ-ᶿ_-]*) +$", "i");
  1024   1024   
  1025   1025   function prevword1 (s, iEnd) {
  1026   1026       // get previous word (optimization)
  1027   1027       let m = _zPrevWord.exec(s.slice(0, iEnd));
  1028   1028       if (!m) {
  1029   1029           return null;
  1030   1030       }

Changes to graphspell-js/tokenizer.js.

     9      9   
    10     10   
    11     11   const aTkzPatterns = {
    12     12       // All regexps must start with ^.
    13     13       "default":
    14     14           [
    15     15               [/^[   \t]+/, 'SPACE'],
    16         -            [/^\/(?:~|bin|boot|dev|etc|home|lib|mnt|opt|root|sbin|tmp|usr|var|Bureau|Documents|Images|Musique|Public|Téléchargements|Vidéos)(?:\/[a-zA-Zà-öÀ-Ö0-9ø-ÿØ-ßĀ-ʯfi-st_.()-]+)*/, 'FOLDERUNIX'],
    17         -            [/^[a-zA-Z]:\\(?:Program Files(?: \(x86\)|)|[a-zA-Zà-öÀ-Ö0-9ø-ÿØ-ßĀ-ʯfi-st.()]+)(?:\\[a-zA-Zà-öÀ-Ö0-9ø-ÿØ-ßĀ-ʯfi-st_.()-]+)*/, 'FOLDERWIN'],
           16  +            [/^\/(?:~|bin|boot|dev|etc|home|lib|mnt|opt|root|sbin|tmp|usr|var|Bureau|Documents|Images|Musique|Public|Téléchargements|Vidéos)(?:\/[a-zA-Zà-öÀ-Ö0-9ø-ÿØ-ßĀ-ʯff-st_.()-]+)*/, 'FOLDERUNIX'],
           17  +            [/^[a-zA-Z]:\\(?:Program Files(?: \(x86\)|)|[a-zA-Zà-öÀ-Ö0-9ø-ÿØ-ßĀ-ʯff-st.()]+)(?:\\[a-zA-Zà-öÀ-Ö0-9ø-ÿØ-ßĀ-ʯff-st_.()-]+)*/, 'FOLDERWIN'],
    18     18               [/^[,.;:!?…«»“”‘’"(){}\[\]·–—¿¡]/, 'SEPARATOR'],
    19     19               [/^[A-Z][.][A-Z][.](?:[A-Z][.])*/, 'WORD_ACRONYM'],
    20         -            [/^(?:https?:\/\/|www[.]|[a-zA-Zà-öÀ-Ö0-9ø-ÿØ-ßĀ-ʯfi-st_-]+[@.][a-zA-Zà-öÀ-Ö0-9ø-ÿØ-ßĀ-ʯfi-st_-]{2,}[@.])[a-zA-Z0-9][a-zA-Zà-öÀ-Ö0-9ø-ÿØ-ßĀ-ʯfi-st_.\/?&!%=+*"'@$#-]+/, 'LINK'],
    21         -            [/^[#@][a-zA-Zà-öÀ-Ö0-9ø-ÿØ-ßĀ-ʯfi-st_-]+/, 'TAG'],
    22         -            [/^<[a-zA-Zà-öÀ-Ö0-9ø-ÿØ-ßĀ-ʯfi-st]+.*?>|<\/[a-zA-Zà-öÀ-Ö0-9ø-ÿØ-ßĀ-ʯfi-st]+ *>/, 'HTML'],
    23         -            [/^\[\/?[a-zA-Zà-öÀ-Ö0-9ø-ÿØ-ßĀ-ʯfi-st]+\]/, 'PSEUDOHTML'],
           20  +            [/^(?:https?:\/\/|www[.]|[a-zA-Zà-öÀ-Ö0-9ø-ÿØ-ßĀ-ʯff-st_-]+[@.][a-zA-Zà-öÀ-Ö0-9ø-ÿØ-ßĀ-ʯff-st_-]{2,}[@.])[a-zA-Z0-9][a-zA-Zà-öÀ-Ö0-9ø-ÿØ-ßĀ-ʯff-st_.\/?&!%=+*"'@$#-]+/, 'LINK'],
           21  +            [/^[#@][a-zA-Zà-öÀ-Ö0-9ø-ÿØ-ßĀ-ʯff-st_-]+/, 'TAG'],
           22  +            [/^<[a-zA-Zà-öÀ-Ö0-9ø-ÿØ-ßĀ-ʯff-st]+.*?>|<\/[a-zA-Zà-öÀ-Ö0-9ø-ÿØ-ßĀ-ʯff-st]+ *>/, 'HTML'],
           23  +            [/^\[\/?[a-zA-Zà-öÀ-Ö0-9ø-ÿØ-ßĀ-ʯff-st]+\]/, 'PSEUDOHTML'],
    24     24               [/^&\w+;(?:\w+;|)/, 'HTMLENTITY'],
    25     25               [/^\d\d?[h:]\d\d\b/, 'HOUR'],
    26     26               [/^\d+(?:[.,]\d+|)/, 'NUM'],
    27     27               [/^[&%‰€$+±=*/<>⩾⩽#|×¥£§¢¬÷@-]/, 'SIGN'],
    28         -            [/^[a-zA-Zà-öÀ-Ö0-9ø-ÿØ-ßĀ-ʯfi-stᴀ-ᶿ_]+(?:[’'`-][a-zA-Zà-öÀ-Ö0-9ø-ÿØ-ßĀ-ʯfi-stᴀ-ᶿ_]+)*/, 'WORD']
           28  +            [/^[a-zA-Zà-öÀ-Ö0-9ø-ÿØ-ßĀ-ʯff-stᴀ-ᶿ_]+(?:[’'`-][a-zA-Zà-öÀ-Ö0-9ø-ÿØ-ßĀ-ʯff-stᴀ-ᶿ_]+)*/, 'WORD']
    29     29           ],
    30     30       "fr":
    31     31           [
    32     32               [/^[   \t]+/, 'SPACE'],
    33         -            [/^\/(?:~|bin|boot|dev|etc|home|lib|mnt|opt|root|sbin|tmp|usr|var|Bureau|Documents|Images|Musique|Public|Téléchargements|Vidéos)(?:\/[a-zA-Zà-öÀ-Ö0-9ø-ÿØ-ßĀ-ʯfi-st_.()-]+)*/, 'FOLDERUNIX'],
    34         -            [/^[a-zA-Z]:\\(?:Program Files(?: \(x86\)|)|[a-zA-Zà-öÀ-Ö0-9ø-ÿØ-ßĀ-ʯfi-st.()]+)(?:\\[a-zA-Zà-öÀ-Ö0-9ø-ÿØ-ßĀ-ʯfi-st_.()-]+)*/, 'FOLDERWIN'],
           33  +            [/^\/(?:~|bin|boot|dev|etc|home|lib|mnt|opt|root|sbin|tmp|usr|var|Bureau|Documents|Images|Musique|Public|Téléchargements|Vidéos)(?:\/[a-zA-Zà-öÀ-Ö0-9ø-ÿØ-ßĀ-ʯff-st_.()-]+)*/, 'FOLDERUNIX'],
           34  +            [/^[a-zA-Z]:\\(?:Program Files(?: \(x86\)|)|[a-zA-Zà-öÀ-Ö0-9ø-ÿØ-ßĀ-ʯff-st.()]+)(?:\\[a-zA-Zà-öÀ-Ö0-9ø-ÿØ-ßĀ-ʯff-st_.()-]+)*/, 'FOLDERWIN'],
    35     35               [/^[,.;:!?…«»“”‘’"(){}\[\]·–—¿¡]/, 'SEPARATOR'],
    36     36               [/^[A-Z][.][A-Z][.](?:[A-Z][.])*/, 'WORD_ACRONYM'],
    37         -            [/^(?:https?:\/\/|www[.]|[a-zA-Zà-öÀ-Ö0-9ø-ÿØ-ßĀ-ʯfi-st_-]+[@.][a-zA-Zà-öÀ-Ö0-9ø-ÿØ-ßĀ-ʯfi-st_-]{2,}[@.])[a-zA-Z0-9][a-zA-Zà-öÀ-Ö0-9ø-ÿØ-ßĀ-ʯfi-st_.\/?&!%=+*"'@$#-]+/, 'LINK'],
    38         -            [/^[#@][a-zA-Zà-öÀ-Ö0-9ø-ÿØ-ßĀ-ʯfi-st_-]+/, 'TAG'],
    39         -            [/^<[a-zA-Zà-öÀ-Ö0-9ø-ÿØ-ßĀ-ʯfi-st]+.*?>|<\/[a-zA-Zà-öÀ-Ö0-9ø-ÿØ-ßĀ-ʯfi-st]+ *>/, 'HTML'],
    40         -            [/^\[\/?[a-zA-Zà-öÀ-Ö0-9ø-ÿØ-ßĀ-ʯfi-st]+\]/, 'PSEUDOHTML'],
           37  +            [/^(?:https?:\/\/|www[.]|[a-zA-Zà-öÀ-Ö0-9ø-ÿØ-ßĀ-ʯff-st_-]+[@.][a-zA-Zà-öÀ-Ö0-9ø-ÿØ-ßĀ-ʯff-st_-]{2,}[@.])[a-zA-Z0-9][a-zA-Zà-öÀ-Ö0-9ø-ÿØ-ßĀ-ʯff-st_.\/?&!%=+*"'@$#-]+/, 'LINK'],
           38  +            [/^[#@][a-zA-Zà-öÀ-Ö0-9ø-ÿØ-ßĀ-ʯff-st_-]+/, 'TAG'],
           39  +            [/^<[a-zA-Zà-öÀ-Ö0-9ø-ÿØ-ßĀ-ʯff-st]+.*?>|<\/[a-zA-Zà-öÀ-Ö0-9ø-ÿØ-ßĀ-ʯff-st]+ *>/, 'HTML'],
           40  +            [/^\[\/?[a-zA-Zà-öÀ-Ö0-9ø-ÿØ-ßĀ-ʯff-st]+\]/, 'PSEUDOHTML'],
    41     41               [/^&\w+;(?:\w+;|)/, 'HTMLENTITY'],
    42     42               [/^(?:l|d|n|m|t|s|j|c|ç|lorsqu|puisqu|jusqu|quoiqu|qu)['’`]/i, 'WORD_ELIDED'],
    43     43               [/^\d\d?[h:]\d\d\b/, 'HOUR'],
    44     44               [/^\d+(?:ers?\b|nds?\b|es?\b|des?\b|ièmes?\b|èmes?\b|emes?\b|ᵉʳˢ?|ⁿᵈˢ?|ᵉˢ?|ᵈᵉˢ?)/, 'WORD_ORDINAL'],
    45     45               [/^\d+(?:[.,]\d+|)/, 'NUM'],
    46     46               [/^[&%‰€$+±=*/<>⩾⩽#|×¥£§¢¬÷@-]/, 'SIGN'],
    47         -            [/^[a-zA-Zà-öÀ-Ö0-9ø-ÿØ-ßĀ-ʯfi-stᴀ-ᶿᵉʳˢⁿᵈ_]+(?:[’'`-][a-zA-Zà-öÀ-Ö0-9ø-ÿØ-ßĀ-ʯfi-stᴀ-ᶿᵉʳˢⁿᵈ_]+)*/, 'WORD']
           47  +            [/^[a-zA-Zà-öÀ-Ö0-9ø-ÿØ-ßĀ-ʯff-stᴀ-ᶿᵉʳˢⁿᵈ_]+(?:[’'`-][a-zA-Zà-öÀ-Ö0-9ø-ÿØ-ßĀ-ʯff-stᴀ-ᶿᵉʳˢⁿᵈ_]+)*/, 'WORD']
    48     48           ]
    49     49   };
    50     50   
    51     51   
    52     52   class Tokenizer {
    53     53   
    54     54       constructor (sLang) {

Changes to js_extension/string.js.

    17     17           }
    18     18           return nOccur;
    19     19       };
    20     20       String.prototype.gl_isDigit = function () {
    21     21           return (this.search(/^[0-9⁰¹²³⁴⁵⁶⁷⁸⁹]+$/) !== -1);
    22     22       };
    23     23       String.prototype.gl_isAlpha = function () {
    24         -        return (this.search(/^[a-zA-Zà-öÀ-Öø-ÿØ-ßĀ-ʯ]+$/) !== -1);
           24  +        return (this.search(/^[a-zà-öA-Zø-ÿÀ-ÖØ-ßĀ-ʯff-stᴀ-ᶿ]+$/) !== -1);
    25     25       };
    26     26       String.prototype.gl_isLowerCase = function () {
    27         -        return (this.search(/^[a-zà-öø-ÿ0-9-]+$/) !== -1);
           27  +        return (this.search(/^[a-zà-öø-ÿff-st0-9-]+$/) !== -1);
    28     28       };
    29     29       String.prototype.gl_isUpperCase = function () {
    30     30           return (this.search(/^[A-ZÀ-ÖØ-ߌ0-9-]+$/) !== -1);
    31     31       };
    32     32       String.prototype.gl_isTitle = function () {
    33         -        return (this.search(/^[A-ZÀ-ÖØ-ߌ][a-zà-öø-ÿ'’-]+$/) !== -1);
           33  +        return (this.search(/^[A-ZÀ-ÖØ-ߌ][a-zà-öø-ÿff-st'’-]+$/) !== -1);
    34     34       };
    35     35       String.prototype.gl_toCapitalize = function () {
    36     36           return this.slice(0,1).toUpperCase() + this.slice(1).toLowerCase();
    37     37       };
    38     38       String.prototype.gl_expand = function (oMatch) {
    39     39           let sNew = this;
    40     40           for (let i = 0; i < oMatch.length ; i++) {