Grammalecte  Check-in [709b969c75]

Overview
Comment:[build][bug] graph builder: action identifier must be more specific to avoid rules confusion
Downloads: Tarball | ZIP archive | SQL archive
Timelines: family | ancestors | descendants | both | build | rg
Files: files | file ages | folders
SHA3-256: 709b969c75c92ea49db6a52b945b473b0744b06ccd95a50b5a0ae96c67acd5fb
User & Date: olr on 2018-08-11 14:00:17
Other Links: branch diff | manifest | tags
Context
2018-08-11
19:22
[fr] conversion: regex rules -> graph rules check-in: e620deee31 user: olr tags: fr, rg
14:00
[build][bug] graph builder: action identifier must be more specific to avoid rules confusion check-in: 709b969c75 user: olr tags: build, rg
10:50
[build] graph builder: function creator check-in: a8d90dbabb user: olr tags: build, rg
Changes

Modified compile_rules_graph.py from [f91ed363c6] to [cb7c6efd58].

    14     14   dFUNCTIONS = {}
    15     15   dFUNCNAME = {}
    16     16   
    17     17   
    18     18   def createFunction (sType, sActionId, sCode, bStartWithEqual=False):
    19     19       "create a function (stored in dFUNCTIONS) and return function name"
    20     20       sCode = prepareFunction(sCode)
    21         -    if sActionId not in dFUNCNAME:
    22         -        dFUNCNAME[sActionId] = {}
    23         -    if sCode not in dFUNCNAME[sActionId]:
    24         -        dFUNCNAME[sActionId][sCode] = len(dFUNCNAME[sActionId])+1
    25         -    sFuncName = "_g_" + sType + "_" + sActionId + "_" + str(dFUNCNAME[sActionId][sCode])
           21  +    if sType not in dFUNCNAME:
           22  +        dFUNCNAME[sType] = {}
           23  +    if sCode not in dFUNCNAME[sType]:
           24  +        dFUNCNAME[sType][sCode] = len(dFUNCNAME[sType])+1
           25  +    sFuncName = "_g_" + sType + "_" + str(dFUNCNAME[sType][sCode])
    26     26       dFUNCTIONS[sFuncName] = sCode
    27     27       return sFuncName  if not bStartWithEqual  else "="+sFuncName
    28     28   
    29     29   
    30         -def prepareFunction (s):
           30  +def prepareFunction (sCode):
    31     31       "convert simple rule syntax to a string of Python code"
    32         -    if s[0:1] == "=":
    33         -        s = s[1:]
    34         -    s = s.replace("__also__", "bCondMemo")
    35         -    s = s.replace("__else__", "not bCondMemo")
    36         -    s = s.replace("sContext", "_sAppContext")
    37         -    s = re.sub(r"(morph|morphVC|analyse|value|displayInfo)[(]\\(\d+)", 'g_\\1(lToken[nTokenOffset+\\2]', s)
    38         -    s = re.sub(r"(morph|morphVC|analyse|value|displayInfo)[(]\\-(\d+)", 'g_\\1(lToken[nLastToken-\\2+1]', s)
    39         -    s = re.sub(r"(select|exclude|define|define_from)[(][\\](\d+)", 'g_\\1(lToken[nTokenOffset+\\2]', s)
    40         -    s = re.sub(r"(select|exclude|define|define_from)[(][\\]-(\d+)", 'g_\\1(lToken[nLastToken-\\2+1]', s)
    41         -    s = re.sub(r"(tag_before|tag_after)[(][\\](\d+)", 'g_\\1(lToken[nTokenOffset+\\2], dTags', s)
    42         -    s = re.sub(r"(tag_before|tag_after)[(][\\]-(\d+)", 'g_\\1(lToken[nLastToken-\\2+1], dTags', s)
    43         -    s = re.sub(r"space_after[(][\\](\d+)", 'g_space_between_tokens(lToken[nTokenOffset+\\1], lToken[nTokenOffset+\\1+1]', s)
    44         -    s = re.sub(r"space_after[(][\\]-(\d+)", 'g_space_between_tokens(lToken[nLastToken-\\1+1], lToken[nLastToken-\\1+2]', s)
    45         -    s = re.sub(r"analyse_with_next[(][\\](\d+)", 'g_merged_analyse(lToken[nTokenOffset+\\1], lToken[nTokenOffset+\\1+1]', s)
    46         -    s = re.sub(r"analyse_with_next[(][\\]-(\d+)", 'g_merged_analyse(lToken[nLastToken-\\1+1], lToken[nLastToken-\\1+2]', s)
    47         -    s = re.sub(r"(morph|analyse|value)\(>1", 'g_\\1(lToken[nLastToken+1]', s)                       # next token
    48         -    s = re.sub(r"(morph|analyse|value)\(<1", 'g_\\1(lToken[nTokenOffset]', s)                       # previous token
    49         -    s = re.sub(r"(morph|analyse|value)\(>(\d+)", 'g_\\1(g_token(lToken, nLastToken+\\2)', s)          # next token
    50         -    s = re.sub(r"(morph|analyse|value)\(<(\d+)", 'g_\\1(g_token(lToken, nTokenOffset+1-\\2)', s)      # previous token
    51         -    s = re.sub(r"\bspell *[(]", '_oSpellChecker.isValid(', s)
    52         -    s = re.sub(r"\bbefore\(\s*", 'look(sSentence[:lToken[1+nTokenOffset]["nStart"]], ', s)          # before(s)
    53         -    s = re.sub(r"\bafter\(\s*", 'look(sSentence[lToken[nLastToken]["nEnd"]:], ', s)                 # after(s)
    54         -    s = re.sub(r"\bbefore0\(\s*", 'look(sSentence0[:lToken[1+nTokenOffset]["nStart"]], ', s)        # before0(s)
    55         -    s = re.sub(r"\bafter0\(\s*", 'look(sSentence[lToken[nLastToken]["nEnd"]:], ', s)                # after0(s)
    56         -    s = re.sub(r"[\\](\d+)", 'lToken[nTokenOffset+\\1]["sValue"]', s)
    57         -    s = re.sub(r"[\\]-(\d+)", 'lToken[nLastToken-\\1+1]["sValue"]', s)
    58         -    return s
           32  +    if sCode[0:1] == "=":
           33  +        sCode = sCode[1:]
           34  +    sCode = sCode.replace("__also__", "bCondMemo")
           35  +    sCode = sCode.replace("__else__", "not bCondMemo")
           36  +    sCode = sCode.replace("sContext", "_sAppContext")
           37  +    sCode = re.sub(r"(morph|morphVC|analyse|value|displayInfo)[(]\\(\d+)", 'g_\\1(lToken[nTokenOffset+\\2]', sCode)
           38  +    sCode = re.sub(r"(morph|morphVC|analyse|value|displayInfo)[(]\\-(\d+)", 'g_\\1(lToken[nLastToken-\\2+1]', sCode)
           39  +    sCode = re.sub(r"(select|exclude|define|define_from)[(][\\](\d+)", 'g_\\1(lToken[nTokenOffset+\\2]', sCode)
           40  +    sCode = re.sub(r"(select|exclude|define|define_from)[(][\\]-(\d+)", 'g_\\1(lToken[nLastToken-\\2+1]', sCode)
           41  +    sCode = re.sub(r"(tag_before|tag_after)[(][\\](\d+)", 'g_\\1(lToken[nTokenOffset+\\2], dTags', sCode)
           42  +    sCode = re.sub(r"(tag_before|tag_after)[(][\\]-(\d+)", 'g_\\1(lToken[nLastToken-\\2+1], dTags', sCode)
           43  +    sCode = re.sub(r"space_after[(][\\](\d+)", 'g_space_between_tokens(lToken[nTokenOffset+\\1], lToken[nTokenOffset+\\1+1]', sCode)
           44  +    sCode = re.sub(r"space_after[(][\\]-(\d+)", 'g_space_between_tokens(lToken[nLastToken-\\1+1], lToken[nLastToken-\\1+2]', sCode)
           45  +    sCode = re.sub(r"analyse_with_next[(][\\](\d+)", 'g_merged_analyse(lToken[nTokenOffset+\\1], lToken[nTokenOffset+\\1+1]', sCode)
           46  +    sCode = re.sub(r"analyse_with_next[(][\\]-(\d+)", 'g_merged_analyse(lToken[nLastToken-\\1+1], lToken[nLastToken-\\1+2]', sCode)
           47  +    sCode = re.sub(r"(morph|analyse|value)\(>1", 'g_\\1(lToken[nLastToken+1]', sCode)                       # next token
           48  +    sCode = re.sub(r"(morph|analyse|value)\(<1", 'g_\\1(lToken[nTokenOffset]', sCode)                       # previous token
           49  +    sCode = re.sub(r"(morph|analyse|value)\(>(\d+)", 'g_\\1(g_token(lToken, nLastToken+\\2)', sCode)        # next token
           50  +    sCode = re.sub(r"(morph|analyse|value)\(<(\d+)", 'g_\\1(g_token(lToken, nTokenOffset+1-\\2)', sCode)    # previous token
           51  +    sCode = re.sub(r"\bspell *[(]", '_oSpellChecker.isValid(', sCode)
           52  +    sCode = re.sub(r"\bbefore\(\s*", 'look(sSentence[:lToken[1+nTokenOffset]["nStart"]], ', sCode)          # before(sCode)
           53  +    sCode = re.sub(r"\bafter\(\s*", 'look(sSentence[lToken[nLastToken]["nEnd"]:], ', sCode)                 # after(sCode)
           54  +    sCode = re.sub(r"\bbefore0\(\s*", 'look(sSentence0[:lToken[1+nTokenOffset]["nStart"]], ', sCode)        # before0(sCode)
           55  +    sCode = re.sub(r"\bafter0\(\s*", 'look(sSentence[lToken[nLastToken]["nEnd"]:], ', sCode)                # after0(sCode)
           56  +    sCode = re.sub(r"[\\](\d+)", 'lToken[nTokenOffset+\\1]["sValue"]', sCode)
           57  +    sCode = re.sub(r"[\\]-(\d+)", 'lToken[nLastToken-\\1+1]["sValue"]', sCode)
           58  +    return sCode
    59     59   
    60     60   
    61     61   def genTokenLines (sTokenLine, dDef):
    62     62       "tokenize a string and return a list of lines of tokens"
    63     63       lToken = sTokenLine.split()
    64     64       lTokenLines = None
    65     65       for sToken in lToken:
................................................................................
   122    122       "generator: create rule as list"
   123    123       # print(iLine, "//", sRuleName, "//", sTokenLine, "//", sActions, "//", nPriority)
   124    124       for lToken in genTokenLines(sTokenLine, dDef):
   125    125           # Calculate positions
   126    126           dPos = {}   # key: iGroup, value: iToken
   127    127           iGroup = 0
   128    128           #if iLine == 3971: # debug
   129         -        #    print(lToken)
          129  +        #    print(lToken.join(" "))
   130    130           for i, sToken in enumerate(lToken):
   131    131               if sToken.startswith("(") and sToken.endswith(")"):
   132    132                   lToken[i] = sToken[1:-1]
   133    133                   iGroup += 1
   134    134                   dPos[iGroup] = i + 1    # we add 1, for we count tokens from 1 to n (not from 0)
   135    135   
   136    136           # Parse actions
   137    137           for iAction, sAction in enumerate(sActions.split(" <<- ")):
   138    138               sAction = sAction.strip()
   139    139               if sAction:
   140         -                sActionId = sRuleName + "__b" + str(iActionBlock) + "_a" + str(iAction) + "_" + str(len(lToken))
          140  +                sActionId = sRuleName + "__b" + str(iActionBlock) + "_l" + str(iLine) + "_a" + str(iAction) + "_" + str(len(lToken))
   141    141                   aAction = createAction(sActionId, sAction, nPriority, dOptPriority, len(lToken), dPos)
   142    142                   if aAction:
   143    143                       dACTIONS[sActionId] = aAction
   144    144                       lResult = list(lToken)
   145    145                       lResult.extend(["##"+str(iLine), sActionId])
          146  +                    if iLine == 13341:
          147  +                        print("  ".join(lToken))
          148  +                        print(sActionId, aAction)
   146    149                       yield lResult
   147    150                   else:
   148    151                       print(" # Error on action at line:", iLine)
   149    152                       print(sTokenLine, "\n", sActions)
   150    153   
   151    154   
   152    155   def changeReferenceToken (sText, dPos):