Grammalecte  Check-in [a835df567d]

Overview
Comment:[build] rename global vars
Downloads: Tarball | ZIP archive | SQL archive
Timelines: family | ancestors | descendants | both | trunk | build
Files: files | file ages | folders
SHA3-256: a835df567d36bd6d8260ae36db48b7e032aa0efa7128ba797df7fdb6bb0878b1
User & Date: olr on 2017-05-16 23:46:18
Other Links: manifest | tags
Context
2017-05-16
23:52
[build] count unnamed rules check-in: e6a7cd50a7 user: olr tags: build, trunk
23:46
[build] rename global vars check-in: a835df567d user: olr tags: build, trunk
22:30
[fr] tabulations check-in: b650eee6b7 user: olr tags: fr, trunk
Changes

Modified compile_rules.py from [e2df43feb0] to [e1c06fc5a9].

     3      3   import sys
     4      4   import traceback
     5      5   import copy
     6      6   import json
     7      7   from distutils import file_util
     8      8   
     9      9   
    10         -DEF = {}
    11         -FUNCTIONS = []
           10  +dDEF = {}
           11  +lFUNCTIONS = []
    12     12   
    13         -RULESET = set()     # set of rule-ids to check if there is several rules with the same id
           13  +aRULESET = set()     # set of rule-ids to check if there is several rules with the same id
    14     14   
    15         -JSREGEXES = {}
           15  +dJSREGEXES = {}
    16     16   
    17         -WORDLIMITLEFT  = r"(?<![\w.,–-])"   # r"(?<![-.,—])\b"  seems slower
    18         -WORDLIMITRIGHT = r"(?![\w–-])"      # r"\b(?!-—)"       seems slower
           17  +sWORDLIMITLEFT  = r"(?<![\w.,–-])"   # r"(?<![-.,—])\b"  seems slower
           18  +sWORDLIMITRIGHT = r"(?![\w–-])"      # r"\b(?!-—)"       seems slower
    19     19   
    20     20   
    21     21   def prepareFunction (s):
    22     22       s = s.replace("__also__", "bCondMemo")
    23     23       s = s.replace("__else__", "not bCondMemo")
    24     24       s = re.sub(r"(select|exclude)[(][\\](\d+)", '\\1(dDA, m.start(\\2), m.group(\\2)', s)
    25     25       s = re.sub(r"define[(][\\](\d+)", 'define(dDA, m.start(\\1)', s)
................................................................................
   160    160           traceback.print_exc()
   161    161           print(sRegex)
   162    162       return 0
   163    163   
   164    164   
   165    165   def createRule (s, nIdLine, sLang, bParagraph, dOptPriority):
   166    166       "returns rule as list [option name, regex, bCaseInsensitive, identifier, list of actions]"
   167         -    global JSREGEXES
          167  +    global dJSREGEXES
   168    168   
   169    169       #### OPTIONS
   170    170       sLineId = str(nIdLine) + ("p" if bParagraph else "s")
   171    171       sRuleId = sLineId
   172    172       sOption = False         # False or [a-z0-9]+ name
   173    173       nPriority = 4           # Default is 4, value must be between 0 and 9
   174    174       tGroups = None          # code for groups positioning (only useful for JavaScript)
................................................................................
   179    179       if m:
   180    180           cWordLimitLeft = m.group('borders_and_case')[0]
   181    181           cCaseMode = m.group('borders_and_case')[1]
   182    182           cWordLimitRight = m.group('borders_and_case')[2]
   183    183           sOption = m.group('option')[1:]  if m.group('option')  else False
   184    184           if m.group('ruleid'):
   185    185               sRuleId =  m.group('ruleid')[1:-1]
   186         -            if sRuleId in RULESET:
          186  +            if sRuleId in aRULESET:
   187    187                   print("# Error. Several rules have the same id: " + sRuleId)
   188    188                   exit()
   189         -            RULESET.add(sRuleId)
          189  +            aRULESET.add(sRuleId)
   190    190           nPriority = dOptPriority.get(sOption, 4)
   191    191           if m.group('priority'):
   192    192               nPriority = int(m.group('priority')[1:])
   193    193           s = s[m.end(0):]
   194    194       else:
   195    195           print("# Warning. No option defined at line: " + sLineId)
   196    196   
................................................................................
   206    206       m = re.search("@@\\S+", sRegex)
   207    207       if m:
   208    208           tGroups = groupsPositioningCodeToList(sRegex[m.start()+2:])
   209    209           sRegex = sRegex[:m.start()].strip()
   210    210       # JS regex
   211    211       m = re.search("<js>.+</js>i?", sRegex)
   212    212       if m:
   213         -        JSREGEXES[sLineId] = m.group(0)
          213  +        dJSREGEXES[sLineId] = m.group(0)
   214    214           sRegex = sRegex[:m.start()].strip()
   215    215       if "<js>" in sRegex or "</js>" in sRegex:
   216    216           print("# Error: JavaScript regex not delimited at line " + sLineId)
   217    217           return None
   218    218   
   219    219       # quotes ?
   220    220       if sRegex.startswith('"') and sRegex.endswith('"'):
   221    221           sRegex = sRegex[1:-1]
   222    222   
   223    223       ## definitions
   224         -    for sDef, sRepl in DEF.items():
          224  +    for sDef, sRepl in dDEF.items():
   225    225           sRegex = sRegex.replace(sDef, sRepl)
   226    226   
   227    227       ## count number of groups (must be done before modifying the regex)
   228    228       nGroup = countGroupInRegex(sRegex)
   229    229       if nGroup > 0:
   230    230           if not tGroups:
   231    231               print("# Warning: groups positioning code for JavaScript should be defined at line " + sLineId)
   232    232           else:
   233    233               if nGroup != len(tGroups):
   234    234                   print("# Error: groups positioning code irrelevant at line " + sLineId)
   235    235   
   236    236       ## word limit
   237    237       if cWordLimitLeft == '[' and not sRegex.startswith(("^", '’', "'", ",")):
   238         -        sRegex = WORDLIMITLEFT + sRegex
          238  +        sRegex = sWORDLIMITLEFT + sRegex
   239    239       if cWordLimitRight == ']' and not sRegex.endswith(("$", '’', "'", ",")):
   240         -        sRegex = sRegex + WORDLIMITRIGHT
          240  +        sRegex = sRegex + sWORDLIMITRIGHT
   241    241   
   242    242       ## casing mode
   243    243       if cCaseMode == "i":
   244    244           bCaseInsensitive = True
   245    245           if not sRegex.startswith("(?i)"):
   246    246               sRegex = "(?i)" + sRegex
   247    247       elif cCaseMode == "s":
................................................................................
   278    278           return None
   279    279   
   280    280       return [sOption, sRegex, bCaseInsensitive, sLineId, sRuleId, nPriority, lActions, tGroups]
   281    281   
   282    282   
   283    283   def createAction (sIdAction, sAction, nGroup):
   284    284       "returns an action to perform as a tuple (condition, action type, action[, iGroup [, message, URL ]])"
   285         -    global FUNCTIONS
          285  +    global lFUNCTIONS
   286    286   
   287    287       m = re.search(r"([-~=>])(\d*|)>>", sAction)
   288    288       if not m:
   289    289           print("# No action at line " + sIdAction)
   290    290           return None
   291    291   
   292    292       #### CONDITION
   293    293       sCondition = sAction[:m.start()].strip()
   294    294       if sCondition:
   295    295           sCondition = prepareFunction(sCondition)
   296         -        FUNCTIONS.append(("c_"+sIdAction, sCondition))
          296  +        lFUNCTIONS.append(("c_"+sIdAction, sCondition))
   297    297           for x in re.finditer("[.](?:group|start|end)[(](\d+)[)]", sCondition):
   298    298               if int(x.group(1)) > nGroup:
   299    299                   print("# Error in groups in condition at line " + sIdAction + " ("+str(nGroup)+" groups only)")
   300    300           if ".match" in sCondition:
   301    301               print("# Error. JS compatibility. Don't use .match() in condition, use .search()")
   302    302           sCondition = "c_"+sIdAction
   303    303       else:
................................................................................
   319    319           sURL = ""
   320    320           mURL = re.search("[|] *(https?://.*)", sMsg)
   321    321           if mURL:
   322    322               sURL = mURL.group(1).strip()
   323    323               sMsg = sMsg[:mURL.start(0)].strip()
   324    324           if sMsg[0:1] == "=":
   325    325               sMsg = prepareFunction(sMsg[1:])
   326         -            FUNCTIONS.append(("m_"+sIdAction, sMsg))
          326  +            lFUNCTIONS.append(("m_"+sIdAction, sMsg))
   327    327               for x in re.finditer("group[(](\d+)[)]", sMsg):
   328    328                   if int(x.group(1)) > nGroup:
   329    329                       print("# Error in groups in message at line " + sIdAction + " ("+str(nGroup)+" groups only)")
   330    330               sMsg = "=m_"+sIdAction
   331    331           else:
   332    332               for x in re.finditer(r"\\(\d+)", sMsg):
   333    333                   if int(x.group(1)) > nGroup:
................................................................................
   351    351               print("# Error in action at line " + sIdAction + ":  This action looks like code. Line should begin with =")
   352    352   
   353    353       if cAction == "-":
   354    354           ## error detected --> suggestion
   355    355           if not sAction:
   356    356               print("# Error in action at line " + sIdAction + ":  This action is empty.")
   357    357           if sAction[0:1] == "=":
   358         -            FUNCTIONS.append(("s_"+sIdAction, sAction[1:]))
          358  +            lFUNCTIONS.append(("s_"+sIdAction, sAction[1:]))
   359    359               sAction = "=s_"+sIdAction
   360    360           elif sAction.startswith('"') and sAction.endswith('"'):
   361    361               sAction = sAction[1:-1]
   362    362           if not sMsg:
   363    363               print("# Error in action at line " + sIdAction + ":  the message is empty.")
   364    364           return [sCondition, cAction, sAction, iGroup, sMsg, sURL]
   365    365       elif cAction == "~":
   366    366           ## text processor
   367    367           if not sAction:
   368    368               print("# Error in action at line " + sIdAction + ":  This action is empty.")
   369    369           if sAction[0:1] == "=":
   370         -            FUNCTIONS.append(("p_"+sIdAction, sAction[1:]))
          370  +            lFUNCTIONS.append(("p_"+sIdAction, sAction[1:]))
   371    371               sAction = "=p_"+sIdAction
   372    372           elif sAction.startswith('"') and sAction.endswith('"'):
   373    373               sAction = sAction[1:-1]
   374    374           return [sCondition, cAction, sAction, iGroup]
   375    375       elif cAction == "=":
   376    376           ## disambiguator
   377    377           if sAction[0:1] == "=":
   378    378               sAction = sAction[1:]
   379    379           if not sAction:
   380    380               print("# Error in action at line " + sIdAction + ":  This action is empty.")
   381         -        FUNCTIONS.append(("d_"+sIdAction, sAction))
          381  +        lFUNCTIONS.append(("d_"+sIdAction, sAction))
   382    382           sAction = "d_"+sIdAction
   383    383           return [sCondition, cAction, sAction]
   384    384       elif cAction == ">":
   385    385           ## no action, break loop if condition is False
   386    386           return [sCondition, cAction, ""]
   387    387       else:
   388    388           print("# Unknown action at line " + sIdAction)
................................................................................
   402    402       #   Ā-ʯ     0100-02AF   (mixed)
   403    403       #   -> a-zA-Zà-öÀ-Ö0-9_ø-ÿØ-ßĀ-ʯ
   404    404       bCaseInsensitive = False
   405    405       if "(?i)" in sRegex:
   406    406           sRegex = sRegex.replace("(?i)", "")
   407    407           bCaseInsensitive = True
   408    408       lNegLookBeforeRegex = []
   409         -    if WORDLIMITLEFT in sRegex:
   410         -        sRegex = sRegex.replace(WORDLIMITLEFT, "")
          409  +    if sWORDLIMITLEFT in sRegex:
          410  +        sRegex = sRegex.replace(sWORDLIMITLEFT, "")
   411    411           lNegLookBeforeRegex = ["[a-zA-Zà-öÀ-Ö0-9_ø-ÿØ-ßĀ-ʯ.,–-]$"]
   412    412       sRegex = sRegex.replace("[\\w", "[a-zA-Zà-öÀ-Ö0-9_ø-ÿØ-ßĀ-ʯ")
   413    413       sRegex = sRegex.replace("\\w", "[a-zA-Zà-öÀ-Ö0-9_ø-ÿØ-ßĀ-ʯ]")
   414    414       sRegex = sRegex.replace("[.]", r"\.")
   415    415       if not sRegex.startswith("<js>"):
   416    416           sRegex = sRegex.replace("/", r"\/")
   417    417       m = re.search(r"\(\?<!([^()]+)\)", sRegex)  # Negative lookbefore assertion should always be at the beginning of regex
................................................................................
   436    436       lRuleJS = copy.deepcopy(lRule)
   437    437       del lRule[-1] # tGroups positioning codes are useless for Python
   438    438       # error messages
   439    439       for aAction in lRuleJS[6]:
   440    440           if aAction[1] == "-":
   441    441               aAction[4] = aAction[4].replace("« ", "«&nbsp;").replace(" »", "&nbsp;»")
   442    442       # js regexes
   443         -    lRuleJS[1], lNegLookBehindRegex = regex2js( JSREGEXES.get(lRuleJS[3], lRuleJS[1]) )
          443  +    lRuleJS[1], lNegLookBehindRegex = regex2js( dJSREGEXES.get(lRuleJS[3], lRuleJS[1]) )
   444    444       lRuleJS.append(lNegLookBehindRegex)
   445    445       return lRuleJS
   446    446   
   447    447   
   448    448   def writeRulesToJSArray (lRules):
   449    449       sArray = "[\n"
   450    450       for sOption, aRuleGroup in lRules:
................................................................................
   542    542   
   543    543   def make (lRules, sLang, bJavaScript):
   544    544       "compile rules, returns a dictionary of values"
   545    545       # for clarity purpose, don’t create any file here
   546    546   
   547    547       # removing comments, zeroing empty lines, creating definitions, storing tests, merging rule lines
   548    548       print("  parsing rules...")
   549         -    global DEF
          549  +    global dDEF
   550    550       lLine = []
   551    551       lRuleLine = []
   552    552       lTest = []
   553    553       lOpt = []
   554    554       for i, sLine in enumerate(lRules, 1):
   555    555           if sLine.startswith('#END'):
   556    556               break
   557    557           elif sLine.startswith("#"):
   558    558               pass
   559    559           elif sLine.startswith("DEF:"):
   560    560               m = re.match("DEF: +([a-zA-Z_][a-zA-Z_0-9]*) +(.+)$", sLine.strip())
   561    561               if m:
   562         -                DEF["{"+m.group(1)+"}"] = m.group(2)
          562  +                dDEF["{"+m.group(1)+"}"] = m.group(2)
   563    563               else:
   564    564                   print("Error in definition: ", end="")
   565    565                   print(sLine.strip())
   566    566           elif sLine.startswith("TEST:"):
   567    567               lTest.append("{:<8}".format(i) + "  " + sLine[5:].strip())
   568    568           elif sLine.startswith("TODO:"):
   569    569               pass
................................................................................
   611    611                           lSentenceRules.append(aRule)
   612    612                           lSentenceRulesJS.append(pyRuleToJS(aRule))
   613    613   
   614    614       # creating file with all functions callable by rules
   615    615       print("  creating callables...")
   616    616       sPyCallables = "# generated code, do not edit\n"
   617    617       sJSCallables = "// generated code, do not edit\nconst oEvalFunc = {\n"
   618         -    for sFuncName, sReturn in FUNCTIONS:
          618  +    for sFuncName, sReturn in lFUNCTIONS:
   619    619           cType = sFuncName[0:1]
   620    620           if cType == "c": # condition
   621    621               sParams = "s, sx, m, dDA, sCountry, bCondMemo"
   622    622           elif cType == "m": # message
   623    623               sParams = "s, m"
   624    624           elif cType == "s": # suggestion
   625    625               sParams = "s, m"