Grammalecte  Check-in [18191569f4]

Overview
Comment:[build] compile rules: code cleaning (pylint)
Downloads: Tarball | ZIP archive | SQL archive
Timelines: family | ancestors | descendants | both | build | rg
Files: files | file ages | folders
SHA3-256: 18191569f40606a692b0397f1b35893588a120aa9b77afca5e4da3f7e03d0cb5
User & Date: olr on 2018-06-24 17:51:48
Other Links: branch diff | manifest | tags
Context
2018-06-24
18:45
[build] compile rules: code clarification check-in: 4ff036a562 user: olr tags: build, rg
17:51
[build] compile rules: code cleaning (pylint) check-in: 18191569f4 user: olr tags: build, rg
16:45
[server] code cleaning (pylint) check-in: 9e6790402a user: olr tags: rg, server
Changes

Modified compile_rules.py from [2382a8be81] to [30d53476f8].

            1  +"""
            2  +Grammalecte: compile rules
            3  +"""
     1      4   
     2      5   import re
     3      6   import traceback
     4      7   import json
     5      8   
     6      9   import compile_rules_js_convert as jsconv
     7     10   import compile_rules_graph as crg
................................................................................
    16     19   dJSREGEXES = {}
    17     20   
    18     21   sWORDLIMITLEFT  = r"(?<![\w.,–-])"   # r"(?<![-.,—])\b"  seems slower
    19     22   sWORDLIMITRIGHT = r"(?![\w–-])"      # r"\b(?!-—)"       seems slower
    20     23   
    21     24   
    22     25   def prepareFunction (s):
           26  +    "convert simple rule syntax to a string of Python code"
    23     27       s = s.replace("__also__", "bCondMemo")
    24     28       s = s.replace("__else__", "not bCondMemo")
    25     29       s = re.sub(r"isStart *\(\)", 'before("^ *$|, *$")', s)
    26     30       s = re.sub(r"isRealStart *\(\)", 'before("^ *$")', s)
    27     31       s = re.sub(r"isStart0 *\(\)", 'before0("^ *$|, *$")', s)
    28     32       s = re.sub(r"isRealStart0 *\(\)", 'before0("^ *$")', s)
    29     33       s = re.sub(r"isEnd *\(\)", 'after("^ *$|^,")', s)
................................................................................
    96    100               nState = 4
    97    101           elif nState == 4:
    98    102               nState = 0
    99    103       return sUp
   100    104   
   101    105   
   102    106   def countGroupInRegex (sRegex):
          107  +    "returns the number of groups in <sRegex>"
   103    108       try:
   104    109           return re.compile(sRegex).groups
   105    110       except:
   106    111           traceback.print_exc()
   107    112           print(sRegex)
   108    113       return 0
   109    114   
................................................................................
   115    120   
   116    121       sLineId = str(nIdLine) + ("p" if bParagraph else "s")
   117    122       sRuleId = sLineId
   118    123   
   119    124       #### GRAPH CALL
   120    125       if s.startswith("@@@@"):
   121    126           if bParagraph:
   122         -            print("Error. Graph call can’t be made only after the first pass (sentence by sentence)")
          127  +            print("Error. Graph call can be made only after the first pass (sentence by sentence)")
   123    128               exit()
   124    129           return ["@@@@", s[4:], sLineId]
   125    130   
   126    131       #### OPTIONS
   127    132       sOption = False         # False or [a-z0-9]+ name
   128    133       nPriority = 4           # Default is 4, value must be between 0 and 9
   129    134       tGroups = None          # code for groups positioning (only useful for JavaScript)
................................................................................
   209    214           sRegex = sRegex.replace("(?i)", "")
   210    215           sRegex = uppercase(sRegex, sLang)
   211    216       else:
   212    217           print("# Unknown case mode [" + cCaseMode + "] at line " + sLineId)
   213    218   
   214    219       ## check regex
   215    220       try:
   216         -        z = re.compile(sRegex)
          221  +        re.compile(sRegex)
   217    222       except:
   218    223           print("# Regex error at line ", nIdLine)
   219    224           print(sRegex)
   220    225           traceback.print_exc()
   221    226           return None
   222    227       ## groups in non grouping parenthesis
   223         -    for x in re.finditer("\(\?:[^)]*\([[\w -]", sRegex):
          228  +    for x in re.finditer(r"\(\?:[^)]*\([[\w -]", sRegex):
   224    229           print("# Warning: groups inside non grouping parenthesis in regex at line " + sLineId)
   225    230   
   226    231       #### PARSE ACTIONS
   227    232       lActions = []
   228    233       nAction = 1
   229    234       for sAction in s.split(" <<- "):
   230    235           t = createAction(sRuleId + "_" + str(nAction), sAction, nGroup)
................................................................................
   235    240           return None
   236    241   
   237    242       return [sOption, sRegex, bCaseInsensitive, sLineId, sRuleId, nPriority, lActions, tGroups]
   238    243   
   239    244   
   240    245   def createAction (sIdAction, sAction, nGroup):
   241    246       "returns an action to perform as a tuple (condition, action type, action[, iGroup [, message, URL ]])"
   242         -    global lFUNCTIONS
   243         -
   244    247       m = re.search(r"([-~=>])(\d*|)>>", sAction)
   245    248       if not m:
   246    249           print("# No action at line " + sIdAction)
   247    250           return None
   248    251   
   249    252       #### CONDITION
   250    253       sCondition = sAction[:m.start()].strip()
   251    254       if sCondition:
   252    255           sCondition = prepareFunction(sCondition)
   253    256           lFUNCTIONS.append(("_c_"+sIdAction, sCondition))
   254         -        for x in re.finditer("[.](?:group|start|end)[(](\d+)[)]", sCondition):
          257  +        for x in re.finditer(r"[.](?:group|start|end)[(](\d+)[)]", sCondition):
   255    258               if int(x.group(1)) > nGroup:
   256    259                   print("# Error in groups in condition at line " + sIdAction + " ("+str(nGroup)+" groups only)")
   257    260           if ".match" in sCondition:
   258    261               print("# Error. JS compatibility. Don't use .match() in condition, use .search()")
   259    262           sCondition = "_c_"+sIdAction
   260    263       else:
   261    264           sCondition = None
................................................................................
   282    285               mURL = re.search("[|] *(https?://.*)", sMsg)
   283    286               if mURL:
   284    287                   sURL = mURL.group(1).strip()
   285    288                   sMsg = sMsg[:mURL.start(0)].strip()
   286    289               if sMsg[0:1] == "=":
   287    290                   sMsg = prepareFunction(sMsg[1:])
   288    291                   lFUNCTIONS.append(("_m_"+sIdAction, sMsg))
   289         -                for x in re.finditer("group[(](\d+)[)]", sMsg):
          292  +                for x in re.finditer(r"group[(](\d+)[)]", sMsg):
   290    293                       if int(x.group(1)) > nGroup:
   291    294                           print("# Error in groups in message at line " + sIdAction + " ("+str(nGroup)+" groups only)")
   292    295                   sMsg = "=_m_"+sIdAction
   293    296               else:
   294    297                   for x in re.finditer(r"\\(\d+)", sMsg):
   295    298                       if int(x.group(1)) > nGroup:
   296    299                           print("# Error in groups in message at line " + sIdAction + " ("+str(nGroup)+" groups only)")
................................................................................
   298    301                       print("# Error in message at line " + sIdAction + ":  This message looks like code. Line should begin with =")
   299    302   
   300    303       if sAction[0:1] == "=" or cAction == "=":
   301    304           if "define" in sAction and not re.search(r"define\(\\\d+ *, *\[.*\] *\)", sAction):
   302    305               print("# Error in action at line " + sIdAction + ": second argument for define must be a list of strings")
   303    306           sAction = prepareFunction(sAction)
   304    307           sAction = sAction.replace("m.group(i[4])", "m.group("+str(iGroup)+")")
   305         -        for x in re.finditer("group[(](\d+)[)]", sAction):
          308  +        for x in re.finditer(r"group[(](\d+)[)]", sAction):
   306    309               if int(x.group(1)) > nGroup:
   307    310                   print("# Error in groups in replacement at line " + sIdAction + " ("+str(nGroup)+" groups only)")
   308    311       else:
   309    312           for x in re.finditer(r"\\(\d+)", sAction):
   310    313               if int(x.group(1)) > nGroup:
   311    314                   print("# Error in groups in replacement at line " + sIdAction + " ("+str(nGroup)+" groups only)")
   312    315           if re.search("[.]\\w+[(]|sugg\\w+[(]", sAction):
................................................................................
   348    351           return [sCondition, cAction, ""]
   349    352       else:
   350    353           print("# Unknown action at line " + sIdAction)
   351    354           return None
   352    355   
   353    356   
   354    357   def _calcRulesStats (lRules):
          358  +    "count rules and actions"
   355    359       d = {'=':0, '~': 0, '-': 0, '>': 0}
   356    360       for aRule in lRules:
   357    361           if aRule[0] != "@@@@":
   358    362               for aAction in aRule[6]:
   359    363                   d[aAction[1]] = d[aAction[1]] + 1
   360    364       return (d, len(lRules))
   361    365   
   362    366   
   363    367   def displayStats (lParagraphRules, lSentenceRules):
          368  +    "display rules numbers"
   364    369       print("  {:>18} {:>18} {:>18} {:>18}".format("DISAMBIGUATOR", "TEXT PROCESSOR", "GRAMMAR CHECKING", "REGEX"))
   365    370       d, nRule = _calcRulesStats(lParagraphRules)
   366    371       print("§ {:>10} actions {:>10} actions {:>10} actions  in {:>8} rules".format(d['='], d['~'], d['-'], nRule))
   367    372       d, nRule = _calcRulesStats(lSentenceRules)
   368    373       print("s {:>10} actions {:>10} actions {:>10} actions  in {:>8} rules".format(d['='], d['~'], d['-'], nRule))
   369    374   
   370    375   
................................................................................
   399    404               m = re.match("OPTGROUP/([a-z0-9]+):(.+)$", sLine)
   400    405               lStructOpt.append( (m.group(1), list(map(str.split, m.group(2).split(",")))) )
   401    406           elif sLine.startswith("OPTSOFTWARE:"):
   402    407               lOpt = [ [s, {}]  for s in sLine[12:].strip().split() ]  # don’t use tuples (s, {}), because unknown to JS
   403    408           elif sLine.startswith("OPT/"):
   404    409               m = re.match("OPT/([a-z0-9]+):(.+)$", sLine)
   405    410               for i, sOpt in enumerate(m.group(2).split()):
   406         -                lOpt[i][1][m.group(1)] =  eval(sOpt)
          411  +                lOpt[i][1][m.group(1)] = eval(sOpt)
   407    412           elif sLine.startswith("OPTPRIORITY/"):
   408    413               m = re.match("OPTPRIORITY/([a-z0-9]+): *([0-9])$", sLine)
   409    414               dOptPriority[m.group(1)] = int(m.group(2))
   410    415           elif sLine.startswith("OPTLANG/"):
   411    416               m = re.match("OPTLANG/([a-z][a-z](?:_[A-Z][A-Z]|)):(.+)$", sLine)
   412    417               sLang = m.group(1)[:2]
   413    418               dOptLabel[sLang] = { "__optiontitle__": m.group(2).strip() }
................................................................................
   423    428       print("  options defined for: " + ", ".join([ t[0] for t in lOpt ]))
   424    429       dOptions = { "lStructOpt": lStructOpt, "dOptLabel": dOptLabel, "sDefaultUILang": sDefaultUILang }
   425    430       dOptions.update({ "dOpt"+k: v  for k, v in lOpt })
   426    431       return dOptions, dOptPriority
   427    432   
   428    433   
   429    434   def printBookmark (nLevel, sComment, nLine):
          435  +    "print bookmark within the rules file"
   430    436       print("  {:>6}:  {}".format(nLine, "  " * nLevel + sComment))
   431    437   
   432    438   
   433    439   def make (spLang, sLang, bJavaScript):
   434    440       "compile rules, returns a dictionary of values"
   435    441       # for clarity purpose, don’t create any file here
   436    442   
................................................................................
   569    575           sJSCallables += "    },\n"
   570    576       sJSCallables += "}\n"
   571    577   
   572    578       displayStats(lParagraphRules, lSentenceRules)
   573    579   
   574    580       print("Unnamed rules: " + str(nRULEWITHOUTNAME))
   575    581   
   576         -    d = { "callables": sPyCallables,
   577         -          "callablesJS": sJSCallables,
   578         -          "gctests": sGCTests,
   579         -          "gctestsJS": sGCTestsJS,
   580         -          "paragraph_rules": mergeRulesByOption(lParagraphRules),
   581         -          "sentence_rules": mergeRulesByOption(lSentenceRules),
   582         -          "paragraph_rules_JS": jsconv.writeRulesToJSArray(mergeRulesByOption(lParagraphRulesJS)),
   583         -          "sentence_rules_JS": jsconv.writeRulesToJSArray(mergeRulesByOption(lSentenceRulesJS)) }
   584         -    d.update(dOptions)
          582  +    dVars = {   "callables": sPyCallables,
          583  +                "callablesJS": sJSCallables,
          584  +                "gctests": sGCTests,
          585  +                "gctestsJS": sGCTestsJS,
          586  +                "paragraph_rules": mergeRulesByOption(lParagraphRules),
          587  +                "sentence_rules": mergeRulesByOption(lSentenceRules),
          588  +                "paragraph_rules_JS": jsconv.writeRulesToJSArray(mergeRulesByOption(lParagraphRulesJS)),
          589  +                "sentence_rules_JS": jsconv.writeRulesToJSArray(mergeRulesByOption(lSentenceRulesJS)) }
          590  +    dVars.update(dOptions)
   585    591   
   586    592       # compile graph rules
   587         -    d2 = crg.make(lGraphRule, dDEF, sLang, bJavaScript)
   588         -    d.update(d2)
          593  +    dVars2 = crg.make(lGraphRule, dDEF, sLang, bJavaScript)
          594  +    dVars.update(dVars2)
   589    595   
   590         -    return d
          596  +    return dVars

Modified compile_rules_graph.py from [ef1f63d249] to [c4702b3e5a].

     1         -# Create a Direct Acyclic Rule Graph (DARG)
            1  +"""
            2  +Grammalecte: compile rules
            3  +Create a Direct Acyclic Rule Graphs (DARGs)
            4  +"""
     2      5   
     3      6   import re
     4      7   import traceback
     5      8   import json
     6      9   
     7     10   import darg
     8     11   
     9     12   
    10     13   dACTIONS = {}
    11     14   dFUNCTIONS = {}
    12     15   
    13     16   
    14     17   def prepareFunction (s, bTokenValue=False):
           18  +    "convert simple rule syntax to a string of Python code"
    15     19       s = s.replace("__also__", "bCondMemo")
    16     20       s = s.replace("__else__", "not bCondMemo")
    17     21       s = re.sub(r"(morph|analyse|displayInfo)[(]\\(\d+)", 'g_\\1(lToken[\\2+nTokenOffset]', s)
    18     22       s = re.sub(r"(select|exclude|define)[(][\\](\d+)", 'g_\\1(lToken[\\2+nTokenOffset], dTags', s)
    19     23       s = re.sub(r"(tag_before|tag_after)[(][\\](\d+)", 'g_\\1(lToken[\\2+nTokenOffset], dTags', s)
    20     24       s = re.sub(r"(switchGender|has(?:Mas|Fem)Form)[(]\\(\d+)", '\\1(lToken[\\2+nTokenOffset]["sValue"]', s)
    21     25       s = re.sub(r"(morph|analyse)\(>1", 'g_\\1(lToken[nLastToken+1]', s)                     # next token
................................................................................
    35     39       return s
    36     40   
    37     41   
    38     42   def genTokenLines (sTokenLine, dDef):
    39     43       "tokenize a string and return a list of lines of tokens"
    40     44       lToken = sTokenLine.split()
    41     45       lTokenLines = None
    42         -    for i, sToken in enumerate(lToken):
           46  +    for sToken in lToken:
    43     47           # optional token?
    44     48           bNullPossible = sToken.startswith("?") and sToken.endswith("¿")
    45     49           if bNullPossible:
    46     50               sToken = sToken[1:-1]
    47     51           # token with definition?
    48     52           if sToken.startswith("({") and sToken.endswith("})") and sToken[1:-1] in dDef:
    49     53               sToken = "(" + dDef[sToken[1:-1]] + ")"
................................................................................
    92     96                       for aRule in lTokenLines:
    93     97                           aRule.append(sToken)
    94     98       for aRule in lTokenLines:
    95     99           yield aRule
    96    100   
    97    101   
    98    102   def createRule (iLine, sRuleName, sTokenLine, iActionBlock, sActions, nPriority, dDef):
          103  +    "generator: create rule as list"
    99    104       # print(iLine, "//", sRuleName, "//", sTokenLine, "//", sActions, "//", nPriority)
   100    105       for lToken in genTokenLines(sTokenLine, dDef):
   101    106           # Calculate positions
   102    107           dPos = {}   # key: iGroup, value: iToken
   103    108           iGroup = 0
   104    109           for i, sToken in enumerate(lToken):
   105    110               if sToken.startswith("(") and sToken.endswith(")"):
................................................................................
   117    122                       dACTIONS[sActionId] = aAction
   118    123                       lResult = list(lToken)
   119    124                       lResult.extend(["##"+str(iLine), sActionId])
   120    125                       yield lResult
   121    126   
   122    127   
   123    128   def changeReferenceToken (sText, dPos):
          129  +    "change group reference in <sText> with values in <dPos>"
   124    130       for i in range(len(dPos), 0, -1):
   125    131           sText = sText.replace("\\"+str(i), "\\"+str(dPos[i]))
   126    132       return sText
   127    133   
   128    134   
   129    135   def checkTokenNumbers (sText, sActionId, nToken):
          136  +    "check if token references in <sText> greater than <nToken> (debugging)"
   130    137       for x in re.finditer(r"\\(\d+)", sText):
   131    138           if int(x.group(1)) > nToken:
   132    139               print("# Error in token index at line " + sActionId + " ("+str(nToken)+" tokens only)")
   133    140               print(sText)
   134    141   
   135    142   
   136    143   def checkIfThereIsCode (sText, sActionId):
          144  +    "check if there is code in <sText> (debugging)"
   137    145       if re.search("[.]\\w+[(]|sugg\\w+[(]|\\([0-9]|\\[[0-9]", sText):
   138    146           print("# Warning at line " + sActionId + ":  This message looks like code. Line should probably begin with =")
   139    147           print(sText)
   140    148   
   141    149   
   142    150   def createAction (sActionId, sAction, nPriority, nToken, dPos):
          151  +    "create action rule as a list"
   143    152       # Option
   144    153       sOption = False
   145    154       m = re.match("/(\\w+)/", sAction)
   146    155       if m:
   147    156           sOption = m.group(1)
   148    157           sAction = sAction[m.end():].strip()
   149    158       # valid action?
................................................................................
   365    374           print("\nActions:")
   366    375           for sActionName, aAction in dACTIONS.items():
   367    376               print(sActionName, aAction)
   368    377           print("\nFunctions:")
   369    378           print(sPyCallables)
   370    379   
   371    380       # Result
   372         -    d = {
          381  +    return {
   373    382           "graph_callables": sPyCallables,
   374    383           "rules_graphs": dAllGraph,
   375    384           "rules_actions": dACTIONS
   376    385       }
   377         -    return d

Modified compile_rules_js_convert.py from [f2cc9f3e39] to [9aa0239064].

     1         -# Convert Python code to JavaScript code
            1  +"""
            2  +Convert Python code and regexes to JavaScript code
            3  +"""
     2      4   
     3      5   import copy
     4      6   import re
     5      7   import json
     6      8   
     7      9   
     8     10   def py2js (sCode):
................................................................................
   114    116           sRegex = sRegex + "i"
   115    117       if not lNegLookBeforeRegex:
   116    118           lNegLookBeforeRegex = None
   117    119       return (sRegex, lNegLookBeforeRegex)
   118    120   
   119    121   
   120    122   def pyRuleToJS (lRule, dJSREGEXES, sWORDLIMITLEFT):
          123  +    "modify Python rules -> JS rules"
   121    124       lRuleJS = copy.deepcopy(lRule)
   122    125       # graph rules
   123    126       if lRuleJS[0] == "@@@@":
   124    127           return lRuleJS
   125    128       del lRule[-1] # tGroups positioning codes are useless for Python
   126    129       # error messages
   127    130       for aAction in lRuleJS[6]:
................................................................................
   131    134       # js regexes
   132    135       lRuleJS[1], lNegLookBehindRegex = regex2js(dJSREGEXES.get(lRuleJS[3], lRuleJS[1]), sWORDLIMITLEFT)
   133    136       lRuleJS.append(lNegLookBehindRegex)
   134    137       return lRuleJS
   135    138   
   136    139   
   137    140   def writeRulesToJSArray (lRules):
          141  +    "create rules as a string of arrays (to be bundled in a JSON string)"
   138    142       sArray = "[\n"
   139    143       for sOption, aRuleGroup in lRules:
   140    144           if sOption != "@@@@":
   141    145               sArray += '  ["' + sOption + '", [\n'  if sOption  else  "  [false, [\n"
   142    146               for sRegex, bCaseInsensitive, sLineId, sRuleId, nPriority, lActions, aGroups, aNegLookBehindRegex in aRuleGroup:
   143    147                   sArray += '    [' + sRegex + ", "
   144    148                   sArray += "true, " if bCaseInsensitive  else "false, "
................................................................................
   155    159                   sArray += '    ["' + sGraphName + '", "' + sLineId + '"],\n"'
   156    160               sArray += "  ]],\n"
   157    161       sArray += "]"
   158    162       return sArray
   159    163   
   160    164   
   161    165   def groupsPositioningCodeToList (sGroupsPositioningCode):
          166  +    "convert <sGroupsPositioningCode> to a list of codes (numbers or strings)"
   162    167       if not sGroupsPositioningCode:
   163    168           return None
   164    169       return [ int(sCode)  if sCode.isdigit() or (sCode[0:1] == "-" and sCode[1:].isdigit())  else sCode \
   165    170                for sCode in sGroupsPositioningCode.split(",") ]

Modified make.py from [b5066cbbba] to [47003996f5].

    30     30   
    31     31   
    32     32   def getConfig (sLang):
    33     33       "load config.ini in <sLang> at gc_lang/<sLang>, returns xConfigParser object"
    34     34       xConfig = configparser.SafeConfigParser()
    35     35       xConfig.optionxform = str
    36     36       try:
    37         -        xConfig.read_file(open("gc_lang/" + sLang + "/config.ii", "r", encoding="utf-8"))
           37  +        xConfig.read_file(open("gc_lang/" + sLang + "/config.ini", "r", encoding="utf-8"))
    38     38       except FileNotFoundError:
    39     39           print("# Error. Can’t read config file [" + sLang + "]")
    40     40           exit()
    41     41       return xConfig
    42     42   
    43     43   
    44     44   def createOptionsLabelProperties (dOptLbl):