Grammalecte  Check-in [c55bec5997]

Overview
Comment:[core] better communication between graph rules and regex rules (still a mess for the transition)
Downloads: Tarball | ZIP archive | SQL archive
Timelines: family | ancestors | descendants | both | core | rg
Files: files | file ages | folders
SHA3-256: c55bec5997b5f628feda053fcc611fecafd498b4ac9b87934d575421e4c6c06d
User & Date: olr on 2018-07-09 13:49:47
Other Links: branch diff | manifest | tags
Context
2018-07-09
15:01
[fr] conversion: regex rules -> graph rules check-in: d05dab5b5a user: olr tags: fr, rg
13:49
[core] better communication between graph rules and regex rules (still a mess for the transition) check-in: c55bec5997 user: olr tags: core, rg
13:48
[fr] micro-corrections check-in: 2c4096dba5 user: olr tags: fr, rg
Changes

Modified gc_core/py/lang_core/gc_engine.py from [e53fccaf02] to [8ce717d080].

   111    111           yield (iStart, m.end())
   112    112           iStart = m.end()
   113    113   
   114    114   
   115    115   def parse (sText, sCountry="${country_default}", bDebug=False, dOptions=None, bContext=False):
   116    116       "analyses the paragraph sText and returns list of errors"
   117    117       #sText = unicodedata.normalize("NFC", sText)
   118         -    aErrors = None
          118  +    dErrors = {}
   119    119       sRealText = sText
   120    120       dPriority = {}  # Key = position; value = priority
   121    121       dOpt = _dOptions  if not dOptions  else dOptions
   122    122       bShowRuleId = option('idrule')
   123    123   
   124    124       # parse paragraph
   125    125       try:
   126         -        sNew, aErrors = _proofread(None, sText, sRealText, 0, True, dPriority, sCountry, dOpt, bShowRuleId, bDebug, bContext)
          126  +        sNew, dErrors = _proofread(None, sText, sRealText, 0, True, dErrors, dPriority, sCountry, dOpt, bShowRuleId, bDebug, bContext)
   127    127           if sNew:
   128    128               sText = sNew
   129    129       except:
   130    130           raise
   131    131   
   132    132       # cleanup
   133    133       if " " in sText:
................................................................................
   140    140           sText = sText.replace("‑", "-") # nobreakdash
   141    141   
   142    142       # parse sentences
   143    143       for iStart, iEnd in _getSentenceBoundaries(sText):
   144    144           if 4 < (iEnd - iStart) < 2000:
   145    145               try:
   146    146                   oSentence = TokenSentence(sText[iStart:iEnd], sRealText[iStart:iEnd], iStart)
   147         -                _, errs = _proofread(oSentence, sText[iStart:iEnd], sRealText[iStart:iEnd], iStart, False, dPriority, sCountry, dOpt, bShowRuleId, bDebug, bContext)
   148         -                aErrors.update(errs)
          147  +                _, dErrors = _proofread(oSentence, sText[iStart:iEnd], sRealText[iStart:iEnd], iStart, False, dErrors, dPriority, sCountry, dOpt, bShowRuleId, bDebug, bContext)
   149    148               except:
   150    149                   raise
   151         -    return aErrors.values() # this is a view (iterable)
          150  +    return dErrors.values() # this is a view (iterable)
   152    151   
   153    152   
   154         -def _proofread (oSentence, s, sx, nOffset, bParagraph, dPriority, sCountry, dOptions, bShowRuleId, bDebug, bContext):
   155         -    dErrs = {}
          153  +def _proofread (oSentence, s, sx, nOffset, bParagraph, dErrors, dPriority, sCountry, dOptions, bShowRuleId, bDebug, bContext):
   156    154       bParagraphChange = False
   157    155       bSentenceChange = False
   158    156       dTokenPos = oSentence.dTokenPos if oSentence else {}
   159    157       for sOption, lRuleGroup in _getRules(bParagraph):
   160    158           if sOption == "@@@@":
   161    159               # graph rules
          160  +            oSentence.dError = dErrors
   162    161               if not bParagraph and bSentenceChange:
   163    162                   oSentence.update(s, bDebug)
   164    163                   bSentenceChange = False
   165    164               for sGraphName, sLineId in lRuleGroup:
   166    165                   if bDebug:
   167    166                       print("\n>>>> GRAPH:", sGraphName, sLineId)
   168    167                   bParagraphChange, s = oSentence.parse(dAllGraph[sGraphName], dPriority, sCountry, dOptions, bShowRuleId, bDebug, bContext)
   169         -                dErrs.update(oSentence.dError)
          168  +                dErrors.update(oSentence.dError)
   170    169                   dTokenPos = oSentence.dTokenPos
   171    170           elif not sOption or dOptions.get(sOption, False):
   172    171               # regex rules
   173    172               for zRegex, bUppercase, sLineId, sRuleId, nPriority, lActions in lRuleGroup:
   174    173                   if sRuleId not in _aIgnoredRules:
   175    174                       for m in zRegex.finditer(s):
   176    175                           bCondMemo = None
................................................................................
   180    179                                   bCondMemo = not sFuncCond or globals()[sFuncCond](s, sx, m, dTokenPos, sCountry, bCondMemo)
   181    180                                   if bCondMemo:
   182    181                                       if bDebug:
   183    182                                           print("RULE:", sLineId)
   184    183                                       if cActionType == "-":
   185    184                                           # grammar error
   186    185                                           nErrorStart = nOffset + m.start(eAct[0])
   187         -                                        if nErrorStart not in dErrs or nPriority > dPriority.get(nErrorStart, -1):
   188         -                                            dErrs[nErrorStart] = _createError(s, sx, sWhat, nOffset, m, eAct[0], sLineId, sRuleId, bUppercase, eAct[1], eAct[2], bShowRuleId, sOption, bContext)
          186  +                                        if nErrorStart not in dErrors or nPriority > dPriority.get(nErrorStart, -1):
          187  +                                            dErrors[nErrorStart] = _createError(s, sx, sWhat, nOffset, m, eAct[0], sLineId, sRuleId, bUppercase, eAct[1], eAct[2], bShowRuleId, sOption, bContext)
   189    188                                               dPriority[nErrorStart] = nPriority
   190    189                                       elif cActionType == "~":
   191    190                                           # text processor
   192    191                                           s = _rewrite(s, sWhat, eAct[0], m, bUppercase)
   193    192                                           bParagraphChange = True
   194    193                                           bSentenceChange = True
   195    194                                           if bDebug:
................................................................................
   206    205                                       else:
   207    206                                           echo("# error: unknown action at " + sLineId)
   208    207                                   elif cActionType == ">":
   209    208                                       break
   210    209                               except Exception as e:
   211    210                                   raise Exception(str(e), "# " + sLineId + " # " + sRuleId)
   212    211       if bParagraphChange:
   213         -        return (s, dErrs)
   214         -    return (False, dErrs)
          212  +        return (s, dErrors)
          213  +    return (False, dErrors)
   215    214   
   216    215   
   217    216   def _createError (s, sx, sRepl, nOffset, m, iGroup, sLineId, sRuleId, bUppercase, sMsg, sURL, bShowRuleId, sOption, bContext):
   218    217       nStart = nOffset + m.start(iGroup)
   219    218       nEnd = nOffset + m.end(iGroup)
   220    219       # suggestions
   221    220       if sRepl[0:1] == "=":
................................................................................
   699    698                   elif dToken["sType"] in sMeta:
   700    699                       if bDebug:
   701    700                           print("  MATCH: *" + sMeta)
   702    701                       yield dGraph[dNode["<meta>"][sMeta]]
   703    702   
   704    703       def parse (self, dGraph, dPriority, sCountry="${country_default}", dOptions=None, bShowRuleId=False, bDebug=False, bContext=False):
   705    704           "parse tokens from the text and execute actions encountered"
   706         -        self.dError = {}
   707         -        dPriority = {}  # Key = position; value = priority
   708    705           dOpt = _dOptions  if not dOptions  else dOptions
   709    706           lPointer = []
   710    707           bTagAndRewrite = False
   711    708           for dToken in self.lToken:
   712    709               if bDebug:
   713    710                   print("TOKEN:", dToken["sValue"])
   714    711               # check arcs for each existing pointer
................................................................................
   721    718               for dNode in self._getNextMatchingNodes(dToken, dGraph, dGraph[0], bDebug):
   722    719                   lPointer.append({"iToken": dToken["i"], "dNode": dNode})
   723    720               # check if there is rules to check for each pointer
   724    721               for dPointer in lPointer:
   725    722                   #if bDebug:
   726    723                   #    print("+", dPointer)
   727    724                   if "<rules>" in dPointer["dNode"]:
   728         -                    bChange, dErr = self._executeActions(dGraph, dPointer["dNode"]["<rules>"], dPointer["iToken"]-1, dToken["i"], dPriority, dOpt, sCountry, bShowRuleId, bDebug, bContext)
   729         -                    self.dError.update(dErr)
          725  +                    bChange = self._executeActions(dGraph, dPointer["dNode"]["<rules>"], dPointer["iToken"]-1, dToken["i"], dPriority, dOpt, sCountry, bShowRuleId, bDebug, bContext)
   730    726                       if bChange:
   731    727                           bTagAndRewrite = True
   732    728           if bTagAndRewrite:
   733    729               self.rewrite(bDebug)
   734    730           if bDebug:
   735    731               print(self)
   736    732           return (bTagAndRewrite, self.sSentence)
   737    733   
   738    734       def _executeActions (self, dGraph, dNode, nTokenOffset, nLastToken, dPriority, dOptions, sCountry, bShowRuleId, bDebug, bContext):
   739    735           "execute actions found in the DARG"
   740         -        dError = {}
   741    736           bChange = False
   742    737           for sLineId, nextNodeKey in dNode.items():
   743    738               bCondMemo = None
   744    739               for sRuleId in dGraph[nextNodeKey]:
   745    740                   try:
   746    741                       if bDebug:
   747    742                           print("  TRY:", sRuleId)
................................................................................
   757    752                               if cActionType == "-":
   758    753                                   # grammar error
   759    754                                   nTokenErrorStart = nTokenOffset + eAct[0]
   760    755                                   if "bImmune" not in self.lToken[nTokenErrorStart]:
   761    756                                       nTokenErrorEnd = (nTokenOffset + eAct[1])  if eAct[1]  else nLastToken
   762    757                                       nErrorStart = self.nOffsetWithinParagraph + self.lToken[nTokenErrorStart]["nStart"]
   763    758                                       nErrorEnd = self.nOffsetWithinParagraph + self.lToken[nTokenErrorEnd]["nEnd"]
   764         -                                    if nErrorStart not in dError or eAct[2] > dPriority.get(nErrorStart, -1):
   765         -                                        dError[nErrorStart] = self._createError(sWhat, nTokenOffset, nTokenErrorStart, nErrorStart, nErrorEnd, sLineId, sRuleId, True, eAct[3], eAct[4], bShowRuleId, "notype", bContext)
          759  +                                    if nErrorStart not in self.dError or eAct[2] > dPriority.get(nErrorStart, -1):
          760  +                                        self.dError[nErrorStart] = self._createError(sWhat, nTokenOffset, nTokenErrorStart, nErrorStart, nErrorEnd, sLineId, sRuleId, True, eAct[3], eAct[4], bShowRuleId, "notype", bContext)
   766    761                                           dPriority[nErrorStart] = eAct[2]
   767    762                                           if bDebug:
   768         -                                            print("  NEW_ERROR:", dError[nErrorStart], "\n  ", dRule[sRuleId])
          763  +                                            print("  NEW_ERROR:", self.dError[nErrorStart], "\n  ", dRule[sRuleId])
   769    764                               elif cActionType == "~":
   770    765                                   # text processor
   771    766                                   if bDebug:
   772    767                                       print("  TAG_PREPARE:\n  ", dRule[sRuleId])
   773    768                                   nEndToken = (nTokenOffset + eAct[1])  if eAct[1]  else nLastToken
   774    769                                   self._tagAndPrepareTokenForRewriting(sWhat, nTokenOffset + eAct[0], nEndToken, nTokenOffset, True, bDebug)
   775    770                                   bChange = True
................................................................................
   796    791                                   print("# error: unknown action at " + sLineId)
   797    792                           elif cActionType == ">":
   798    793                               if bDebug:
   799    794                                   print("  COND_BREAK")
   800    795                               break
   801    796                   except Exception as e:
   802    797                       raise Exception(str(e), sLineId, sRuleId, self.sSentence)
   803         -        return bChange, dError
          798  +        return bChange
   804    799   
   805    800       def _createError (self, sSugg, nTokenOffset, iFirstToken, nStart, nEnd, sLineId, sRuleId, bUppercase, sMsg, sURL, bShowRuleId, sOption, bContext):
   806    801           # suggestions
   807    802           if sSugg[0:1] == "=":
   808    803               sSugg = globals()[sSugg[1:]](self.lToken, nTokenOffset)
   809    804               lSugg = sSugg.split("|")  if sSugg  else []
   810    805           elif sSugg == "_":