Grammalecte  Check-in [ca8457058e]

Many hyperlinks are disabled.
Use anonymous login to enable hyperlinks.

Overview
Comment:[build] conversion to JS update
Downloads: Tarball | ZIP archive | SQL archive
Timelines: family | ancestors | descendants | both | build | rg
Files: files | file ages | folders
SHA3-256:ca8457058ef6d52215f754befe3cff77d717ca79ecc13cc538301e4a9241259a
User & Date: olr 2018-09-17 09:32:14
Context
2018-09-17
09:34
[core][js] gc engine: update \w replacements, + remove useless code check-in: 1173853ba9 user: olr tags: core, rg
09:32
[build] conversion to JS update check-in: ca8457058e user: olr tags: build, rg
09:00
[graphspell] tokenizer: add chars to \w replacement (JS still sucks) check-in: c185b3fc04 user: olr tags: graphspell, rg
Changes

Changes to compile_rules_js_convert.py.

47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
..
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
83
    sCode = re.sub(r'.replace\("([^"]+)" ?,', ".replace(/\\1/g,", sCode)
    # regex
    #sCode = re.sub('re.search\\("([^"]+)", *(m.group\\(\\d\\))\\)', "(\\2.search(/\\1/) >= 0)", sCode)
    #sCode = re.sub(".search\\(/\\(\\?i\\)([^/]+)/\\) >= 0\\)", ".search(/\\1/i) >= 0)", sCode)
    #sCode = re.sub('(look\\(sSentence0?[][.a-z0-9:()"+-]*), "\\(\\?i\\)([^"]+)"', "\\1, /\\2/i", sCode)
    #sCode = re.sub('(look\\(sSentence0?[][.a-z0-9:()"+-]*), "([^"]+)"', "\\1, /\\2/", sCode)
    sCode = re.sub('m\\.group\\((\\d+)\\) +in +(a[a-zA-Z]+)', "\\2.has(m[\\1])", sCode)
    sCode = sCode.replace("(?<!-)", "")  # todo
    # slices
    sCode = sCode.replace("[:m.start()]", ".slice(0,m.index)")
    sCode = sCode.replace("[m.end():]", ".slice(m.end[0])")
    sCode = sCode.replace("[m.start():m.end()]", ".slice(m.index, m.end[0])")
    sCode = sCode.replace('[lToken[nLastToken]["nEnd"]:]', '.slice(lToken[nLastToken]["nEnd"])')
    sCode = sCode.replace('[:lToken[1+nTokenOffset]["nStart"]]', '.slice(0,lToken[1+nTokenOffset]["nStart"])')
    sCode = re.sub("\\[(-?\\d+):(-?\\d+)\\]", ".slice(\\1,\\2)", sCode)
................................................................................
    sCode = sCode.replace(".start()", ".index")
    sCode = sCode.replace("m.group()", "m[0]")
    sCode = re.sub("\\.start\\((\\d+)\\)", ".start[\\1]", sCode)
    sCode = re.sub("m\\.group\\((\\d+)\\)", "m[\\1]", sCode)
    # tuples -> lists
    sCode = re.sub("\\((m\\.start\\[\\d+\\], m\\[\\d+\\])\\)", "[\\1]", sCode)
    # regex
    sCode = sCode.replace(r"\w[\w-]+", "[a-zA-Zà-öÀ-Ö0-9_ø-ÿØ-ßĀ-ʯfi-st][a-zA-Zà-öÀ-Ö0-9_ø-ÿØ-ßĀ-ʯfi-st-]+")
    sCode = sCode.replace(r"/\w/", "/[a-zA-Zà-öÀ-Ö0-9_ø-ÿØ-ßĀ-ʯfi-st]/")
    sCode = sCode.replace(r"[\w-]", "[a-zA-Zà-öÀ-Ö0-9_ø-ÿØ-ßĀ-ʯfi-st-]")
    sCode = sCode.replace(r"[\w,]", "[a-zA-Zà-öÀ-Ö0-9_ø-ÿØ-ßĀ-ʯfi-st,]")
    return sCode


def regex2js (sRegex, sWORDLIMITLEFT):
    "converts Python regex to JS regex and returns JS regex and list of negative lookbefore assertions"
    #   Latin letters: http://unicode-table.com/fr/
    #   0-9  and  _







|







 







<
<
|
|







47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
..
66
67
68
69
70
71
72


73
74
75
76
77
78
79
80
81
    sCode = re.sub(r'.replace\("([^"]+)" ?,', ".replace(/\\1/g,", sCode)
    # regex
    #sCode = re.sub('re.search\\("([^"]+)", *(m.group\\(\\d\\))\\)', "(\\2.search(/\\1/) >= 0)", sCode)
    #sCode = re.sub(".search\\(/\\(\\?i\\)([^/]+)/\\) >= 0\\)", ".search(/\\1/i) >= 0)", sCode)
    #sCode = re.sub('(look\\(sSentence0?[][.a-z0-9:()"+-]*), "\\(\\?i\\)([^"]+)"', "\\1, /\\2/i", sCode)
    #sCode = re.sub('(look\\(sSentence0?[][.a-z0-9:()"+-]*), "([^"]+)"', "\\1, /\\2/", sCode)
    sCode = re.sub('m\\.group\\((\\d+)\\) +in +(a[a-zA-Z]+)', "\\2.has(m[\\1])", sCode)
    sCode = re.sub('(lToken\\S+) +in +(a[a-zA-Z]+)', "\\2.has(\\1)", sCode)
    # slices
    sCode = sCode.replace("[:m.start()]", ".slice(0,m.index)")
    sCode = sCode.replace("[m.end():]", ".slice(m.end[0])")
    sCode = sCode.replace("[m.start():m.end()]", ".slice(m.index, m.end[0])")
    sCode = sCode.replace('[lToken[nLastToken]["nEnd"]:]', '.slice(lToken[nLastToken]["nEnd"])')
    sCode = sCode.replace('[:lToken[1+nTokenOffset]["nStart"]]', '.slice(0,lToken[1+nTokenOffset]["nStart"])')
    sCode = re.sub("\\[(-?\\d+):(-?\\d+)\\]", ".slice(\\1,\\2)", sCode)
................................................................................
    sCode = sCode.replace(".start()", ".index")
    sCode = sCode.replace("m.group()", "m[0]")
    sCode = re.sub("\\.start\\((\\d+)\\)", ".start[\\1]", sCode)
    sCode = re.sub("m\\.group\\((\\d+)\\)", "m[\\1]", sCode)
    # tuples -> lists
    sCode = re.sub("\\((m\\.start\\[\\d+\\], m\\[\\d+\\])\\)", "[\\1]", sCode)
    # regex


    sCode = sCode.replace(r"[\\w", "[a-zA-Zà-öÀ-Ö0-9_ø-ÿØ-ßĀ-ʯfi-stᴀ-ᶿ")
    sCode = sCode.replace(r"\\w", "[a-zA-Zà-öÀ-Ö0-9_ø-ÿØ-ßĀ-ʯfi-stᴀ-ᶿ]")
    return sCode


def regex2js (sRegex, sWORDLIMITLEFT):
    "converts Python regex to JS regex and returns JS regex and list of negative lookbefore assertions"
    #   Latin letters: http://unicode-table.com/fr/
    #   0-9  and  _