Grammalecte  Check-in [29461e1888]

Overview
Comment:[build][core] small code cleaning
Downloads: Tarball | ZIP archive | SQL archive
Timelines: family | ancestors | descendants | both | core | build | rg
Files: files | file ages | folders
SHA3-256: 29461e18885efb2f9596972c5fc3b11e3273bff17146495d020065843f0ab967
User & Date: olr on 2018-05-21 13:08:48
Other Links: branch diff | manifest | tags
Context
2018-05-23
08:37
[graphspell][py] data memorization check-in: 3f84923104 user: olr tags: graphspell, rg
2018-05-21
13:08
[build][core] small code cleaning check-in: 29461e1888 user: olr tags: build, core, rg
2018-05-20
10:07
[build][core] graph generation update check-in: 0f6ac8c5a7 user: olr tags: build, core, rg
Changes

Modified darg.py from [06ce413e8b] to [bf378d22b5].

15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
..
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
..
82
83
84
85
86
87
88
89
90
91
92
93
94
95
96
97
98
99
100
101
102
103
104
105
106
107
108
109
110
111
112
113
114
115
116
117
118
119


class DARG:
    """DIRECT ACYCLIC RULE GRAPH"""
    # This code is inspired from Steve Hanov’s DAWG, 2011. (http://stevehanov.ca/blog/index.php?id=115)

    def __init__ (self, lRule, sLangCode):
        print("===== Direct Acyclic Token Graph - Minimal Acyclic Finite State Automaton =====")

        # Preparing DARG
        print(" > Preparing list of tokens")
        self.sLangCode = sLangCode
        self.nRule = len(lRule)
        self.aPreviousRule = []
        Node.resetNextId()
................................................................................
        # add the suffix, starting from the correct node mid-way through the graph
        if len(self.lUncheckedNodes) == 0:
            oNode = self.oRoot
        else:
            oNode = self.lUncheckedNodes[-1][2]

        iToken = nCommonPrefix
        for token in aRule[nCommonPrefix:]:
            oNextNode = Node()
            oNode.dArcs[token] = oNextNode
            self.lUncheckedNodes.append((oNode, token, oNextNode))
            if iToken == (len(aRule) - 2): 
                oNode.bFinal = True
            iToken += 1
            oNode = oNextNode
        oNode.bFinal = True
        self.aPreviousRule = aRule

................................................................................
    def finish (self):
        "minimize unchecked nodes"
        self._minimize(0)

    def _minimize (self, downTo):
        # proceed from the leaf up to a certain point
        for i in range( len(self.lUncheckedNodes)-1, downTo-1, -1 ):
            oNode, token, oChildNode = self.lUncheckedNodes[i]
            if oChildNode in self.lMinimizedNodes:
                # replace the child with the previously encountered one
                oNode.dArcs[token] = self.lMinimizedNodes[oChildNode]
            else:
                # add the state to the minimized nodes.
                self.lMinimizedNodes[oChildNode] = oChildNode
            self.lUncheckedNodes.pop()

    def countNodes (self):
        self.nNode = len(self.lMinimizedNodes)

    def countArcs (self):
        self.nArc = 0
        for oNode in self.lMinimizedNodes:
            self.nArc += len(oNode.dArcs)
        
    def lookup (self, sWord):
        oNode = self.oRoot
        for c in sWord:
            if c not in oNode.dArcs:
                return False
            oNode = oNode.dArcs[c]
        return oNode.bFinal

    def displayInfo (self):
        print(" * {:<12} {:>16,}".format("Rules:", self.nRule))
        print(" * {:<12} {:>16,}".format("Nodes:", self.nNode))
        print(" * {:<12} {:>16,}".format("Arcs:", self.nArc))

    def createGraph (self):







|







 







|

|
|







 







|


|












<
<
<
<
<
<
<
<







15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
..
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
..
82
83
84
85
86
87
88
89
90
91
92
93
94
95
96
97
98
99
100
101
102
103
104








105
106
107
108
109
110
111


class DARG:
    """DIRECT ACYCLIC RULE GRAPH"""
    # This code is inspired from Steve Hanov’s DAWG, 2011. (http://stevehanov.ca/blog/index.php?id=115)

    def __init__ (self, lRule, sLangCode):
        print("===== Direct Acyclic Rule Graph - Minimal Acyclic Finite State Automaton =====")

        # Preparing DARG
        print(" > Preparing list of tokens")
        self.sLangCode = sLangCode
        self.nRule = len(lRule)
        self.aPreviousRule = []
        Node.resetNextId()
................................................................................
        # add the suffix, starting from the correct node mid-way through the graph
        if len(self.lUncheckedNodes) == 0:
            oNode = self.oRoot
        else:
            oNode = self.lUncheckedNodes[-1][2]

        iToken = nCommonPrefix
        for sToken in aRule[nCommonPrefix:]:
            oNextNode = Node()
            oNode.dArcs[sToken] = oNextNode
            self.lUncheckedNodes.append((oNode, sToken, oNextNode))
            if iToken == (len(aRule) - 2): 
                oNode.bFinal = True
            iToken += 1
            oNode = oNextNode
        oNode.bFinal = True
        self.aPreviousRule = aRule

................................................................................
    def finish (self):
        "minimize unchecked nodes"
        self._minimize(0)

    def _minimize (self, downTo):
        # proceed from the leaf up to a certain point
        for i in range( len(self.lUncheckedNodes)-1, downTo-1, -1 ):
            oNode, sToken, oChildNode = self.lUncheckedNodes[i]
            if oChildNode in self.lMinimizedNodes:
                # replace the child with the previously encountered one
                oNode.dArcs[sToken] = self.lMinimizedNodes[oChildNode]
            else:
                # add the state to the minimized nodes.
                self.lMinimizedNodes[oChildNode] = oChildNode
            self.lUncheckedNodes.pop()

    def countNodes (self):
        self.nNode = len(self.lMinimizedNodes)

    def countArcs (self):
        self.nArc = 0
        for oNode in self.lMinimizedNodes:
            self.nArc += len(oNode.dArcs)









    def displayInfo (self):
        print(" * {:<12} {:>16,}".format("Rules:", self.nRule))
        print(" * {:<12} {:>16,}".format("Nodes:", self.nNode))
        print(" * {:<12} {:>16,}".format("Arcs:", self.nArc))

    def createGraph (self):

Modified gc_core/py/lang_core/gc_sentence.py from [5e66d88647] to [90cbca3aed].

18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
..
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68




69
70
71
72
73
74
75
76
77
78
79
..
83
84
85
86
87
88
89
















































    def parse (self):
        dErr = {}
        lPointer = []
        for dToken in self.lToken:
            for i, dPointer in enumerate(lPointer):
                bValid = False
                for dNode in self._getNextMatchingNodes(dToken, dPointer["dNode"]):
                    dPointer["nOffset"] += 1
                    dPointer["dNode"] = dNode
                    bValid = True
                if not bValid:
                    del lPointer[i]
            for dNode in self._getNextMatchingNodes(dToken, dGraph):
                lPointer.append({"nOffset": 0, "dNode": dNode})
            for dPointer in lPointer:
................................................................................
    def _executeActions (self, dNode):
        for sLineId, nextNodeKey in dNode.items():
            for sArc in dGraph[nextNodeKey]:
                bCondMemo = None
                sFuncCond, cActionType, sWhat, *eAct = dRule[sArc]
                # action in lActions: [ condition, action type, replacement/suggestion/action[, iGroupStart, iGroupEnd[, message, URL]] ]
                try:
                    bCondMemo = not sFuncCond or globals()[sFuncCond](s, sx, m, dDA, sCountry, bCondMemo)
                    if bCondMemo:
                        if cActionType == "-":
                            # grammar error
                            nErrorStart = nOffset + m.start(eAct[0])
                            nErrorEnd = nOffset + m.start(eAct[1])
                            if nErrorStart not in dErrs or nPriority > dPriority[nErrorStart]:
                                dErrs[nErrorStart] = _createError(self.lToken, self.sSentence0, sWhat, nOffset, m, nErrorStart, nErrorEnd, sLineId, bUppercase, eAct[2], eAct[3], bIdRule, sOption, bContext)
                                dPriority[nErrorStart] = nPriority
                        elif cActionType == "~":
                            # text processor
                            self.lToken = _rewrite(self.lToken, sWhat, bUppercase)




                            bChange = True
                        elif cActionType == "=":
                            # disambiguation
                            globals()[sWhat](self.lToken, dDA)
                        elif cActionType == ">":
                            # we do nothing, this test is just a condition to apply all following actions
                            pass
                        else:
                            echo("# error: unknown action at " + sLineId)
                    elif cActionType == ">":
                        break
................................................................................
    def _createWriterError (self):
        d = {}
        return d

    def _createDictError (self):
        d = {}
        return d























































|







 







|



|
|

|



|
>
>
>
>



|







 







>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
..
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
83
..
87
88
89
90
91
92
93
94
95
96
97
98
99
100
101
102
103
104
105
106
107
108
109
110
111
112
113
114
115
116
117
118
119
120
121
122
123
124
125
126
127
128
129
130
131
132
133
134
135
136
137
138
139
140
141
    def parse (self):
        dErr = {}
        lPointer = []
        for dToken in self.lToken:
            for i, dPointer in enumerate(lPointer):
                bValid = False
                for dNode in self._getNextMatchingNodes(dToken, dPointer["dNode"]):
                    dPointer["nOffset"] = dToken["i"]
                    dPointer["dNode"] = dNode
                    bValid = True
                if not bValid:
                    del lPointer[i]
            for dNode in self._getNextMatchingNodes(dToken, dGraph):
                lPointer.append({"nOffset": 0, "dNode": dNode})
            for dPointer in lPointer:
................................................................................
    def _executeActions (self, dNode):
        for sLineId, nextNodeKey in dNode.items():
            for sArc in dGraph[nextNodeKey]:
                bCondMemo = None
                sFuncCond, cActionType, sWhat, *eAct = dRule[sArc]
                # action in lActions: [ condition, action type, replacement/suggestion/action[, iGroupStart, iGroupEnd[, message, URL]] ]
                try:
                    bCondMemo = not sFuncCond or globals()[sFuncCond](self, dDA, sCountry, bCondMemo)
                    if bCondMemo:
                        if cActionType == "-":
                            # grammar error
                            nErrorStart = nSentenceOffset + m.start(eAct[0])
                            nErrorEnd = nSentenceOffset + m.start(eAct[1])
                            if nErrorStart not in dErrs or nPriority > dPriority[nErrorStart]:
                                dErrs[nErrorStart] = _createError(self, sWhat, nErrorStart, nErrorEnd, sLineId, bUppercase, eAct[2], eAct[3], bIdRule, sOption, bContext)
                                dPriority[nErrorStart] = nPriority
                        elif cActionType == "~":
                            # text processor
                            self.lToken = _rewrite(self, sWhat, nErrorStart, nErrorEnd, bUppercase)
                            bChange = True
                        elif cActionType == "@":
                            # text processor
                            self.lToken = _rewrite(self, sWhat, nErrorStart, nErrorEnd, bUppercase)
                            bChange = True
                        elif cActionType == "=":
                            # disambiguation
                            globals()[sWhat](self, dDA)
                        elif cActionType == ">":
                            # we do nothing, this test is just a condition to apply all following actions
                            pass
                        else:
                            echo("# error: unknown action at " + sLineId)
                    elif cActionType == ">":
                        break
................................................................................
    def _createWriterError (self):
        d = {}
        return d

    def _createDictError (self):
        d = {}
        return d


#### Common functions

def option ():
    pass


#### Analyse tokens

def morph ():
    pass

def morphex ():
    pass

def analyse ():
    pass

def analysex ():
    pass


#### Go outside scope

def nextToken ():
    pass

def prevToken ():
    pass

def look ():
    pass

def lookAndCheck ():
    pass


#### Disambiguator

def select ():
    pass

def exclude ():
    pass

def define ():
    pass