Grammalecte  Check-in [b5a5b6b161]

Overview
Comment:[core] gc engine: fix bug
Downloads: Tarball | ZIP archive | SQL archive
Timelines: family | ancestors | descendants | both | core | rg
Files: files | file ages | folders
SHA3-256: b5a5b6b16120e61eb0c8c17406f85449179eb79596406da0279b02addbcf6063
User & Date: olr on 2018-07-23 17:54:08
Other Links: branch diff | manifest | tags
Context
2018-07-23
17:54
[fr] conversion: regex rules -> graph rules check-in: af307724f2 user: olr tags: fr, rg
17:54
[core] gc engine: fix bug check-in: b5a5b6b161 user: olr tags: core, rg
17:52
[build] rules condition rewriting update check-in: ee36aa096c user: olr tags: build, rg
Changes

Modified gc_core/py/lang_core/gc_engine.py from [d2b4f44276] to [903e91d939].

139
140
141
142
143
144
145
146
147
148
149
150
151
152
153
...
561
562
563
564
565
566
567
568
569
570
571
572
573
574
575
576
577
578
579
580
581
582

583
584
585
586
587
588
589
590
...
680
681
682
683
684
685
686
687
688
689
690
691
692
693
694
695
...
785
786
787
788
789
790
791
792
793
794
795
796
797
798
799
    if "‑" in sText:
        sText = sText.replace("‑", "-") # nobreakdash

    # parse sentences
    for iStart, iEnd in _getSentenceBoundaries(sText):
        if 4 < (iEnd - iStart) < 2000:
            try:
                oSentence = TokenSentence(sText[iStart:iEnd], sRealText[iStart:iEnd], iStart)
                _, dErrors = _proofread(oSentence, sText[iStart:iEnd], sRealText[iStart:iEnd], iStart, False, dErrors, dPriority, sCountry, dOpt, bShowRuleId, bDebug, bContext)
            except:
                raise
    return dErrors.values() # this is a view (iterable)


def _proofread (oSentence, s, sx, nOffset, bParagraph, dErrors, dPriority, sCountry, dOptions, bShowRuleId, bDebug, bContext):
................................................................................
        return True
    dTokenPos[nPos]["lMorph"] = lMorph
    return True




#### TOKEN SENTENCE CHECKER

class TokenSentence:
    "Text parser"

    def __init__ (self, sSentence, sSentence0, nOffset):
        self.sSentence = sSentence
        self.sSentence0 = sSentence0
        self.nOffsetWithinParagraph = nOffset
        self.lToken = list(_oTokenizer.genTokens(sSentence, True))
        self.dTokenPos = { dToken["nStart"]: dToken  for dToken in self.lToken  if dToken["sType"] != "INFO" }
        self.dTags = {}
        self.dError = {}

    def __str__ (self):

        s = "sentence: " + self.sSentence0 + "\n"
        s += "now:      " + self.sSentence  + "\n"
        for dToken in self.lToken:
            s += f'{dToken["nStart"]}\t{dToken["nEnd"]}\t{dToken["sValue"]}'
            if "lMorph" in dToken:
                s += "\t" + str(dToken["lMorph"])
            s += "\n"
        for nPos, dToken in self.dTokenPos.items():
................................................................................
                                continue
                            if not sPattern or any(re.search(sPattern, sMorph)  for sMorph in _oSpellChecker.getMorph(dToken["sValue"])):
                                if bDebug:
                                    print("  MATCH: @" + sRegex)
                                yield dGraph[dNode["<re_morph>"][sRegex]]
        # token tags
        if "tags" in dToken and "<tags>" in dNode:
            for sTag in dNode["<tags>"]:
                if sTag in dToken["tags"]:
                    if bDebug:
                        print("  MATCH: /" + sTag)
                    yield dGraph[dNode["<tags>"][sTag]]
        # meta arc (for token type)
        if "<meta>" in dNode:
            for sMeta in dNode["<meta>"]:
                # not regex here, we just search if <dNode["sType"]> exists within <sMeta>
................................................................................
                                if bDebug:
                                    print("  COND_OK")
                                pass
                            elif cActionType == "/":
                                if bDebug:
                                    print("  SEMANTIC_TAG:\n  ", dRule[sRuleId])
                                nTokenStart = nTokenOffset + eAct[0]
                                nTokenEnd = nTokenOffset + eAct[1]
                                for i in range(nTokenStart, nTokenEnd+1):
                                    if "tags" in self.lToken[i]:
                                        self.lToken[i]["tags"].update(sWhat.split("|"))
                                    else:
                                        self.lToken[i]["tags"] = set(sWhat.split("|"))
                            elif cActionType == "%":
                                # sentence tags







|







 







|

|












>
|







 







|
|







 







|







139
140
141
142
143
144
145
146
147
148
149
150
151
152
153
...
561
562
563
564
565
566
567
568
569
570
571
572
573
574
575
576
577
578
579
580
581
582
583
584
585
586
587
588
589
590
591
...
681
682
683
684
685
686
687
688
689
690
691
692
693
694
695
696
...
786
787
788
789
790
791
792
793
794
795
796
797
798
799
800
    if "‑" in sText:
        sText = sText.replace("‑", "-") # nobreakdash

    # parse sentences
    for iStart, iEnd in _getSentenceBoundaries(sText):
        if 4 < (iEnd - iStart) < 2000:
            try:
                oSentence = TextParser(sText[iStart:iEnd], sRealText[iStart:iEnd], iStart)
                _, dErrors = _proofread(oSentence, sText[iStart:iEnd], sRealText[iStart:iEnd], iStart, False, dErrors, dPriority, sCountry, dOpt, bShowRuleId, bDebug, bContext)
            except:
                raise
    return dErrors.values() # this is a view (iterable)


def _proofread (oSentence, s, sx, nOffset, bParagraph, dErrors, dPriority, sCountry, dOptions, bShowRuleId, bDebug, bContext):
................................................................................
        return True
    dTokenPos[nPos]["lMorph"] = lMorph
    return True




#### TEXT PARSER

class TextParser:
    "Text parser"

    def __init__ (self, sSentence, sSentence0, nOffset):
        self.sSentence = sSentence
        self.sSentence0 = sSentence0
        self.nOffsetWithinParagraph = nOffset
        self.lToken = list(_oTokenizer.genTokens(sSentence, True))
        self.dTokenPos = { dToken["nStart"]: dToken  for dToken in self.lToken  if dToken["sType"] != "INFO" }
        self.dTags = {}
        self.dError = {}

    def __str__ (self):
        s = "TEXT ==========\n"
        s += "sentence: " + self.sSentence0 + "\n"
        s += "now:      " + self.sSentence  + "\n"
        for dToken in self.lToken:
            s += f'{dToken["nStart"]}\t{dToken["nEnd"]}\t{dToken["sValue"]}'
            if "lMorph" in dToken:
                s += "\t" + str(dToken["lMorph"])
            s += "\n"
        for nPos, dToken in self.dTokenPos.items():
................................................................................
                                continue
                            if not sPattern or any(re.search(sPattern, sMorph)  for sMorph in _oSpellChecker.getMorph(dToken["sValue"])):
                                if bDebug:
                                    print("  MATCH: @" + sRegex)
                                yield dGraph[dNode["<re_morph>"][sRegex]]
        # token tags
        if "tags" in dToken and "<tags>" in dNode:
            for sTag in dToken["tags"]:
                if sTag in dNode["<tags>"]:
                    if bDebug:
                        print("  MATCH: /" + sTag)
                    yield dGraph[dNode["<tags>"][sTag]]
        # meta arc (for token type)
        if "<meta>" in dNode:
            for sMeta in dNode["<meta>"]:
                # not regex here, we just search if <dNode["sType"]> exists within <sMeta>
................................................................................
                                if bDebug:
                                    print("  COND_OK")
                                pass
                            elif cActionType == "/":
                                if bDebug:
                                    print("  SEMANTIC_TAG:\n  ", dRule[sRuleId])
                                nTokenStart = nTokenOffset + eAct[0]
                                nTokenEnd = nTokenOffset + (eAct[1]  if eAct[1]  else eAct[0])
                                for i in range(nTokenStart, nTokenEnd+1):
                                    if "tags" in self.lToken[i]:
                                        self.lToken[i]["tags"].update(sWhat.split("|"))
                                    else:
                                        self.lToken[i]["tags"] = set(sWhat.split("|"))
                            elif cActionType == "%":
                                # sentence tags