Grammalecte  Check-in [450e1ec422]

Overview
Comment:[build] prepareFunctions: use token value by default
Downloads: Tarball | ZIP archive | SQL archive
Timelines: family | ancestors | descendants | both | build | rg
Files: files | file ages | folders
SHA3-256: 450e1ec42269d6ed339d81677db5ff3009f20a93b0bab6c8b331fab2129674b5
User & Date: olr on 2018-07-15 09:43:30
Other Links: branch diff | manifest | tags
Context
2018-07-15
11:33
[build] fix rules generation when first token was multiple and a selected group check-in: 9d39408049 user: olr tags: build, rg
09:43
[build] prepareFunctions: use token value by default check-in: 450e1ec422 user: olr tags: build, rg
2018-07-14
14:01
[fr] conversion: regex rules -> graph rules check-in: 6630bf7012 user: olr tags: rg
Changes

Modified compile_rules_graph.py from [bb72ae50c9] to [58681b66b6].

10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
...
215
216
217
218
219
220
221
222
223
224
225
226
227
228
229
...
237
238
239
240
241
242
243
244
245
246
247
248
249
250
251
252
253
254
255
256
257
258
259
260
261
262
import darg


dACTIONS = {}
dFUNCTIONS = {}


def prepareFunction (s, bTokenValue=False):
    "convert simple rule syntax to a string of Python code"
    s = s.replace("__also__", "bCondMemo")
    s = s.replace("__else__", "not bCondMemo")
    s = re.sub(r"(morph|analyse|value|displayInfo)[(]\\(\d+)", 'g_\\1(lToken[\\2+nTokenOffset]', s)
    s = re.sub(r"(select|exclude|define|define_from)[(][\\](\d+)", 'g_\\1(lToken[\\2+nTokenOffset]', s)
    s = re.sub(r"(tag_before|tag_after)[(][\\](\d+)", 'g_\\1(lToken[\\2+nTokenOffset], dTags', s)
    s = re.sub(r"space_after[(][\\](\d+)", 'g_space_between_tokens(lToken[\\1+nTokenOffset], lToken[\\1+nTokenOffset+1]', s)
    s = re.sub(r"analyse_with_next[(][\\](\d+)", 'g_merged_analyse(lToken[\\1+nTokenOffset], lToken[\\1+nTokenOffset+1]', s)
    s = re.sub(r"(switchGender|has(?:(?:Mas|Fem)Form)|Simil)[(]\\(\d+)", '\\1(lToken[\\2+nTokenOffset]["sValue"]', s)
    s = re.sub(r"(morph|analyse|value)\(>1", 'g_\\1(lToken[nLastToken+1]', s)                       # next token
    s = re.sub(r"(morph|analyse|value)\(<1", 'g_\\1(lToken[nTokenOffset]', s)                       # previous token
    s = re.sub(r"[\\](\d+)\.is(upper|lower|title)\(\)", 'lToken[\\1+nTokenOffset]["sValue"].is\\2()', s)
    s = re.sub(r"[\\](\d+)\.(startswith|endswith)\(", 'lToken[\\1+nTokenOffset]["sValue"].\\2(', s)
    s = re.sub(r"\bspell *[(]", '_oSpellChecker.isValid(', s)
    s = re.sub(r"\bbefore\(\s*", 'look(sSentence[:lToken[1+nTokenOffset]["nStart"]], ', s)          # before(s)
    s = re.sub(r"\bafter\(\s*", 'look(sSentence[lToken[nLastToken]["nEnd"]:], ', s)                 # after(s)
    s = re.sub(r"\bbefore0\(\s*", 'look(sSentence0[:lToken[1+nTokenOffset]["nStart"]], ', s)        # before0(s)
    s = re.sub(r"\bafter0\(\s*", 'look(sSentence[lToken[nLastToken]["nEnd"]:], ', s)                # after0(s)
    if bTokenValue:
        # token values are used as parameter
        s = re.sub(r"[\\](\d+)", 'lToken[\\1+nTokenOffset]["sValue"]', s)
    else:
        # tokens used as parameter
        s = re.sub(r"[\\](\d+)", 'lToken[\\1+nTokenOffset]', s)
    return s


def genTokenLines (sTokenLine, dDef):
    "tokenize a string and return a list of lines of tokens"
    lToken = sTokenLine.split()
    lTokenLines = None
................................................................................
            sURL = ""
            mURL = re.search("[|] *(https?://.*)", sMsg)
            if mURL:
                sURL = mURL.group(1).strip()
                sMsg = sMsg[:mURL.start(0)].strip()
            checkTokenNumbers(sMsg, sActionId, nToken)
            if sMsg[0:1] == "=":
                sMsg = prepareFunction(sMsg[1:], True)
                dFUNCTIONS["g_m_"+sActionId] = sMsg
                sMsg = "=g_m_"+sActionId
            else:
                checkIfThereIsCode(sMsg, sActionId)

    # checking consistancy
    checkTokenNumbers(sAction, sActionId, nToken)
................................................................................

    if sAction[0:1] != "=" and cAction != "=":
        checkIfThereIsCode(sAction, sActionId)

    if cAction == "-":
        ## error detected --> suggestion
        if sAction[0:1] == "=":
            sAction = prepareFunction(sAction, True)
            dFUNCTIONS["_g_s_"+sActionId] = sAction[1:]
            sAction = "=_g_s_"+sActionId
        elif sAction.startswith('"') and sAction.endswith('"'):
            sAction = sAction[1:-1]
        if not sMsg:
            print("# Error in action at line " + sActionId + ":  The message is empty.")
        return [sOption, sCondition, cAction, sAction, iStartAction, iEndAction, nPriority, sMsg, sURL]
    elif cAction == "~":
        ## text processor
        if sAction[0:1] == "=":
            sAction = prepareFunction(sAction, True)
            dFUNCTIONS["_g_p_"+sActionId] = sAction[1:]
            sAction = "=_g_p_"+sActionId
        elif sAction.startswith('"') and sAction.endswith('"'):
            sAction = sAction[1:-1]
        return [sOption, sCondition, cAction, sAction, iStartAction, iEndAction]
    elif cAction == "/":
        ## tags







|








|


|
|





<
<
|
<
<
<







 







|







 







|










|







10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35


36



37
38
39
40
41
42
43
...
210
211
212
213
214
215
216
217
218
219
220
221
222
223
224
...
232
233
234
235
236
237
238
239
240
241
242
243
244
245
246
247
248
249
250
251
252
253
254
255
256
257
import darg


dACTIONS = {}
dFUNCTIONS = {}


def prepareFunction (s):
    "convert simple rule syntax to a string of Python code"
    s = s.replace("__also__", "bCondMemo")
    s = s.replace("__else__", "not bCondMemo")
    s = re.sub(r"(morph|analyse|value|displayInfo)[(]\\(\d+)", 'g_\\1(lToken[\\2+nTokenOffset]', s)
    s = re.sub(r"(select|exclude|define|define_from)[(][\\](\d+)", 'g_\\1(lToken[\\2+nTokenOffset]', s)
    s = re.sub(r"(tag_before|tag_after)[(][\\](\d+)", 'g_\\1(lToken[\\2+nTokenOffset], dTags', s)
    s = re.sub(r"space_after[(][\\](\d+)", 'g_space_between_tokens(lToken[\\1+nTokenOffset], lToken[\\1+nTokenOffset+1]', s)
    s = re.sub(r"analyse_with_next[(][\\](\d+)", 'g_merged_analyse(lToken[\\1+nTokenOffset], lToken[\\1+nTokenOffset+1]', s)
    #s = re.sub(r"(switchGender|has(?:(?:Mas|Fem)Form)|Simil)[(]\\(\d+)", '\\1(lToken[\\2+nTokenOffset]["sValue"]', s)
    s = re.sub(r"(morph|analyse|value)\(>1", 'g_\\1(lToken[nLastToken+1]', s)                       # next token
    s = re.sub(r"(morph|analyse|value)\(<1", 'g_\\1(lToken[nTokenOffset]', s)                       # previous token
    #s = re.sub(r"[\\](\d+)\.is(upper|lower|title)\(\)", 'lToken[\\1+nTokenOffset]["sValue"].is\\2()', s)
    #s = re.sub(r"[\\](\d+)\.(startswith|endswith)\(", 'lToken[\\1+nTokenOffset]["sValue"].\\2(', s)
    s = re.sub(r"\bspell *[(]", '_oSpellChecker.isValid(', s)
    s = re.sub(r"\bbefore\(\s*", 'look(sSentence[:lToken[1+nTokenOffset]["nStart"]], ', s)          # before(s)
    s = re.sub(r"\bafter\(\s*", 'look(sSentence[lToken[nLastToken]["nEnd"]:], ', s)                 # after(s)
    s = re.sub(r"\bbefore0\(\s*", 'look(sSentence0[:lToken[1+nTokenOffset]["nStart"]], ', s)        # before0(s)
    s = re.sub(r"\bafter0\(\s*", 'look(sSentence[lToken[nLastToken]["nEnd"]:], ', s)                # after0(s)


    s = re.sub(r"[\\](\d+)", 'lToken[\\1+nTokenOffset]["sValue"]', s)



    return s


def genTokenLines (sTokenLine, dDef):
    "tokenize a string and return a list of lines of tokens"
    lToken = sTokenLine.split()
    lTokenLines = None
................................................................................
            sURL = ""
            mURL = re.search("[|] *(https?://.*)", sMsg)
            if mURL:
                sURL = mURL.group(1).strip()
                sMsg = sMsg[:mURL.start(0)].strip()
            checkTokenNumbers(sMsg, sActionId, nToken)
            if sMsg[0:1] == "=":
                sMsg = prepareFunction(sMsg[1:])
                dFUNCTIONS["g_m_"+sActionId] = sMsg
                sMsg = "=g_m_"+sActionId
            else:
                checkIfThereIsCode(sMsg, sActionId)

    # checking consistancy
    checkTokenNumbers(sAction, sActionId, nToken)
................................................................................

    if sAction[0:1] != "=" and cAction != "=":
        checkIfThereIsCode(sAction, sActionId)

    if cAction == "-":
        ## error detected --> suggestion
        if sAction[0:1] == "=":
            sAction = prepareFunction(sAction)
            dFUNCTIONS["_g_s_"+sActionId] = sAction[1:]
            sAction = "=_g_s_"+sActionId
        elif sAction.startswith('"') and sAction.endswith('"'):
            sAction = sAction[1:-1]
        if not sMsg:
            print("# Error in action at line " + sActionId + ":  The message is empty.")
        return [sOption, sCondition, cAction, sAction, iStartAction, iEndAction, nPriority, sMsg, sURL]
    elif cAction == "~":
        ## text processor
        if sAction[0:1] == "=":
            sAction = prepareFunction(sAction)
            dFUNCTIONS["_g_p_"+sActionId] = sAction[1:]
            sAction = "=_g_p_"+sActionId
        elif sAction.startswith('"') and sAction.endswith('"'):
            sAction = sAction[1:-1]
        return [sOption, sCondition, cAction, sAction, iStartAction, iEndAction]
    elif cAction == "/":
        ## tags