Grammalecte  Check-in [cc997fd621]

Overview
Comment:[core] gc engine: code cleaning
Downloads: Tarball | ZIP archive | SQL archive
Timelines: family | ancestors | descendants | both | core | rg
Files: files | file ages | folders
SHA3-256: cc997fd6212877f7fe3ec8834912a83519feb970420d5a8edffee7b6b9b7b59e
User & Date: olr on 2018-09-07 10:09:55
Other Links: branch diff | manifest | tags
Context
2018-09-07
21:29
[core] fix bug about context check-in: 1fbd95d6f4 user: olr tags: core, rg
10:09
[core] gc engine: code cleaning check-in: cc997fd621 user: olr tags: core, rg
2018-09-06
07:04
[build][bug] DARG: fix Heisenbug, sort actions identifiers by name (necessary for Python 3.5) check-in: e18bf06f0c user: olr tags: build, rg, warning
Changes

Modified gc_core/py/lang_core/gc_engine.py from [7169e30ca7] to [9f26b926c3].

67
68
69
70
71
72
73







74
75
76
77
78
79
80
..
98
99
100
101
102
103
104
105
106
107
108
109
110
111
112
113
...
136
137
138
139
140
141
142


143
144
145
146
147
148
149
...
177
178
179
180
181
182
183
184
185
186
187
188
189
190
191
192
193
194
195
...
237
238
239
240
241
242
243
244
245
246
247
248
249
250
251
...
337
338
339
340
341
342
343


344
345
346
347
348
349
350
...
460
461
462
463
464
465
466
467
468
469
470
471
472
473
474
475
476
477
478
479
480
481
482
483
484
...
488
489
490
491
492
493
494
495
496
497
498
499
500
501
502
        _sAppContext = sContext
        _dOptions = dict(gc_options.getOptions(sContext))   # duplication necessary, to be able to reset to default
        _oTokenizer = _oSpellChecker.getTokenizer()
        _oSpellChecker.activateStorage()
    except:
        traceback.print_exc()









def _getRules (bParagraph):
    try:
        if not bParagraph:
            return _rules.lSentenceRules
        return _rules.lParagraphRules
    except:
................................................................................
                try:
                    aRule[0] = re.compile(aRule[0])
                except:
                    echo("Bad regular expression in # " + str(aRule[2]))
                    aRule[0] = "(?i)<Grammalecte>"


#### Rules and options

def ignoreRule (sRuleId):
    "disable rule <sRuleId>"
    _aIgnoredRules.add(sRuleId)


def resetIgnoreRules ():
    "clear all ignored rules"
................................................................................

def displayRules (sFilter=None):
    "display the name of rules, with the filter <sFilter>"
    echo("List of rules. Filter: << " + str(sFilter) + " >>")
    for sOption, sLineId, sRuleId in listRegexRules(sFilter):
        echo("{:<10} {:<10} {}".format(sOption, sLineId, sRuleId))




def setOption (sOpt, bVal):
    "set option <sOpt> with <bVal> if it exists"
    if sOpt in _dOptions:
        _dOptions[sOpt] = bVal


................................................................................


def resetOptions ():
    "set options to default values"
    global _dOptions
    _dOptions = dict(gc_options.getOptions(_sAppContext))


def getSpellChecker ():
    "return the spellchecker object"
    return _oSpellChecker


#### Parsing

_zEndOfSentence = re.compile(r'([.?!:;…][ .?!… »”")]*|.$)')
_zBeginOfParagraph = re.compile(r"^\W*")
_zEndOfParagraph = re.compile(r"\W*$")

................................................................................
        #for nPos, dToken in self.dTokenPos.items():
        #    s += "{}\t{}\n".format(nPos, dToken)
        return s

    def parse (self, sCountry="${country_default}", bDebug=False, dOptions=None, bContext=False):
        "analyses the paragraph sText and returns list of errors"
        #sText = unicodedata.normalize("NFC", sText)
        dOpt = _dOptions  if not dOptions  else dOptions
        bShowRuleId = option('idrule')

        # parse paragraph
        try:
            self.parseText(self.sText, self.sText0, True, 0, sCountry, dOpt, bShowRuleId, bDebug, bContext)
        except:
            raise
................................................................................
    def update (self, sSentence, bDebug=False):
        "update <sSentence> and retokenize"
        self.sSentence = sSentence
        lNewToken = list(_oTokenizer.genTokens(sSentence, True))
        for dToken in lNewToken:
            if "lMorph" in self.dTokenPos.get(dToken["nStart"], {}):
                dToken["lMorph"] = self.dTokenPos[dToken["nStart"]]["lMorph"]


        self.lToken = lNewToken
        self.dTokenPos = { dToken["nStart"]: dToken  for dToken in self.lToken  if dToken["sType"] != "INFO" }
        if bDebug:
            echo("UPDATE:")
            echo(self)

    def _getNextPointers (self, dToken, dGraph, dPointer, bDebug=False):
................................................................................
                    bTokenFound = True
                elif "¬" in sMeta:
                    if dToken["sType"] not in sMeta:
                        if bDebug:
                            echo("  MATCH: *" + sMeta)
                        yield { "iNode1": iNode1, "dNode": dGraph[dNode["<meta>"][sMeta]] }
                        bTokenFound = True
        if "bKeep" in dPointer and not bTokenFound:
            yield dPointer
        # JUMP
        # Warning! Recurssion!
        if "<>" in dNode:
            dPointer2 = { "iNode1": iNode1, "dNode": dGraph[dNode["<>"]], "bKeep": True }
            yield from self._getNextPointers(dToken, dGraph, dPointer2, bDebug)

    def parseGraph (self, dGraph, sCountry="${country_default}", dOptions=None, bShowRuleId=False, bDebug=False, bContext=False):
        "parse graph with tokens from the text and execute actions encountered"
        dOpt = _dOptions  if not dOptions  else dOptions
        lPointer = []
        bTagAndRewrite = False
        for iToken, dToken in enumerate(self.lToken):
            if bDebug:
                echo("TOKEN: " + dToken["sValue"])
            # check arcs for each existing pointer
            lNextPointer = []
................................................................................
            # check arcs of first nodes
            lPointer.extend(self._getNextPointers(dToken, dGraph, { "iNode1": iToken, "dNode": dGraph[0] }, bDebug))
            # check if there is rules to check for each pointer
            for dPointer in lPointer:
                #if bDebug:
                #    echo("+", dPointer)
                if "<rules>" in dPointer["dNode"]:
                    bChange = self._executeActions(dGraph, dPointer["dNode"]["<rules>"], dPointer["iNode1"]-1, iToken, dOpt, sCountry, bShowRuleId, bDebug, bContext)
                    if bChange:
                        bTagAndRewrite = True
        if bTagAndRewrite:
            self.rewriteFromTags(bDebug)
        if bDebug:
            echo(self)
        return self.sSentence







>
>
>
>
>
>
>







 







<
<







 







>
>







 







<
<
<
<
<







 







|







 







>
>







 







|









<







 







|







67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
83
84
85
86
87
...
105
106
107
108
109
110
111


112
113
114
115
116
117
118
...
141
142
143
144
145
146
147
148
149
150
151
152
153
154
155
156
...
184
185
186
187
188
189
190





191
192
193
194
195
196
197
...
239
240
241
242
243
244
245
246
247
248
249
250
251
252
253
...
339
340
341
342
343
344
345
346
347
348
349
350
351
352
353
354
...
464
465
466
467
468
469
470
471
472
473
474
475
476
477
478
479
480

481
482
483
484
485
486
487
...
491
492
493
494
495
496
497
498
499
500
501
502
503
504
505
        _sAppContext = sContext
        _dOptions = dict(gc_options.getOptions(sContext))   # duplication necessary, to be able to reset to default
        _oTokenizer = _oSpellChecker.getTokenizer()
        _oSpellChecker.activateStorage()
    except:
        traceback.print_exc()


def getSpellChecker ():
    "return the spellchecker object"
    return _oSpellChecker


#### Rules

def _getRules (bParagraph):
    try:
        if not bParagraph:
            return _rules.lSentenceRules
        return _rules.lParagraphRules
    except:
................................................................................
                try:
                    aRule[0] = re.compile(aRule[0])
                except:
                    echo("Bad regular expression in # " + str(aRule[2]))
                    aRule[0] = "(?i)<Grammalecte>"




def ignoreRule (sRuleId):
    "disable rule <sRuleId>"
    _aIgnoredRules.add(sRuleId)


def resetIgnoreRules ():
    "clear all ignored rules"
................................................................................

def displayRules (sFilter=None):
    "display the name of rules, with the filter <sFilter>"
    echo("List of rules. Filter: << " + str(sFilter) + " >>")
    for sOption, sLineId, sRuleId in listRegexRules(sFilter):
        echo("{:<10} {:<10} {}".format(sOption, sLineId, sRuleId))


#### Options

def setOption (sOpt, bVal):
    "set option <sOpt> with <bVal> if it exists"
    if sOpt in _dOptions:
        _dOptions[sOpt] = bVal


................................................................................


def resetOptions ():
    "set options to default values"
    global _dOptions
    _dOptions = dict(gc_options.getOptions(_sAppContext))







#### Parsing

_zEndOfSentence = re.compile(r'([.?!:;…][ .?!… »”")]*|.$)')
_zBeginOfParagraph = re.compile(r"^\W*")
_zEndOfParagraph = re.compile(r"\W*$")

................................................................................
        #for nPos, dToken in self.dTokenPos.items():
        #    s += "{}\t{}\n".format(nPos, dToken)
        return s

    def parse (self, sCountry="${country_default}", bDebug=False, dOptions=None, bContext=False):
        "analyses the paragraph sText and returns list of errors"
        #sText = unicodedata.normalize("NFC", sText)
        dOpt = dOptions or _dOptions
        bShowRuleId = option('idrule')

        # parse paragraph
        try:
            self.parseText(self.sText, self.sText0, True, 0, sCountry, dOpt, bShowRuleId, bDebug, bContext)
        except:
            raise
................................................................................
    def update (self, sSentence, bDebug=False):
        "update <sSentence> and retokenize"
        self.sSentence = sSentence
        lNewToken = list(_oTokenizer.genTokens(sSentence, True))
        for dToken in lNewToken:
            if "lMorph" in self.dTokenPos.get(dToken["nStart"], {}):
                dToken["lMorph"] = self.dTokenPos[dToken["nStart"]]["lMorph"]
            if "tags" in self.dTokenPos.get(dToken["nStart"], {}):
                dToken["tags"] = self.dTokenPos[dToken["nStart"]]["tags"]
        self.lToken = lNewToken
        self.dTokenPos = { dToken["nStart"]: dToken  for dToken in self.lToken  if dToken["sType"] != "INFO" }
        if bDebug:
            echo("UPDATE:")
            echo(self)

    def _getNextPointers (self, dToken, dGraph, dPointer, bDebug=False):
................................................................................
                    bTokenFound = True
                elif "¬" in sMeta:
                    if dToken["sType"] not in sMeta:
                        if bDebug:
                            echo("  MATCH: *" + sMeta)
                        yield { "iNode1": iNode1, "dNode": dGraph[dNode["<meta>"][sMeta]] }
                        bTokenFound = True
        if not bTokenFound and "bKeep" in dPointer:
            yield dPointer
        # JUMP
        # Warning! Recurssion!
        if "<>" in dNode:
            dPointer2 = { "iNode1": iNode1, "dNode": dGraph[dNode["<>"]], "bKeep": True }
            yield from self._getNextPointers(dToken, dGraph, dPointer2, bDebug)

    def parseGraph (self, dGraph, sCountry="${country_default}", dOptions=None, bShowRuleId=False, bDebug=False, bContext=False):
        "parse graph with tokens from the text and execute actions encountered"

        lPointer = []
        bTagAndRewrite = False
        for iToken, dToken in enumerate(self.lToken):
            if bDebug:
                echo("TOKEN: " + dToken["sValue"])
            # check arcs for each existing pointer
            lNextPointer = []
................................................................................
            # check arcs of first nodes
            lPointer.extend(self._getNextPointers(dToken, dGraph, { "iNode1": iToken, "dNode": dGraph[0] }, bDebug))
            # check if there is rules to check for each pointer
            for dPointer in lPointer:
                #if bDebug:
                #    echo("+", dPointer)
                if "<rules>" in dPointer["dNode"]:
                    bChange = self._executeActions(dGraph, dPointer["dNode"]["<rules>"], dPointer["iNode1"]-1, iToken, dOptions, sCountry, bShowRuleId, bDebug, bContext)
                    if bChange:
                        bTagAndRewrite = True
        if bTagAndRewrite:
            self.rewriteFromTags(bDebug)
        if bDebug:
            echo(self)
        return self.sSentence