Grammalecte  Check-in [d99a910d73]

Overview
Comment:[core] ibdawg: draw path taken by word in dawg
Downloads: Tarball | ZIP archive | SQL archive
Timelines: family | ancestors | descendants | both | trunk | core
Files: files | file ages | folders
SHA3-256: d99a910d73b7e8555099926b4d0e55a619f3fe726f3c03478ec2ad180eebd4d0
User & Date: olr on 2017-06-28 19:40:53
Other Links: manifest | tags
Context
2017-06-29
04:16
[core] ibdawg: add next node when drawing path of a word check-in: 94f5da9f4a user: olr tags: core, trunk
2017-06-28
19:40
[core] ibdawg: draw path taken by word in dawg check-in: d99a910d73 user: olr tags: core, trunk
06:16
[fr] nr: confusion <la/l’a> check-in: f47e4bd4a5 user: olr tags: fr, trunk
Changes

Modified cli.py from [8ec24a7015] to [2ae9df701e].

204
205
206
207
208
209
210


211
212
213
214
215
216
217
                        echo("* " + sWord)
                        for sMorph in oDict.getMorph(sWord):
                            echo("  {:<32} {}".format(sMorph, oLexGraphe.formatTags(sMorph)))
            elif sText.startswith("!"):
                for sWord in sText[1:].strip().split():
                    if sWord:
                        echo(" | ".join(oDict.suggest(sWord)))


            elif sText.startswith("/+ "):
                gce.setOptions({ opt:True  for opt in sText[3:].strip().split()  if opt in gce.getOptions() })
                echo("done")
            elif sText.startswith("/- "):
                gce.setOptions({ opt:False  for opt in sText[3:].strip().split()  if opt in gce.getOptions() })
                echo("done")
            elif sText.startswith("/-- "):







>
>







204
205
206
207
208
209
210
211
212
213
214
215
216
217
218
219
                        echo("* " + sWord)
                        for sMorph in oDict.getMorph(sWord):
                            echo("  {:<32} {}".format(sMorph, oLexGraphe.formatTags(sMorph)))
            elif sText.startswith("!"):
                for sWord in sText[1:].strip().split():
                    if sWord:
                        echo(" | ".join(oDict.suggest(sWord)))
            elif sText.startswith(">"):
                oDict.drawPath(sText[1:].strip())
            elif sText.startswith("/+ "):
                gce.setOptions({ opt:True  for opt in sText[3:].strip().split()  if opt in gce.getOptions() })
                echo("done")
            elif sText.startswith("/- "):
                gce.setOptions({ opt:False  for opt in sText[3:].strip().split()  if opt in gce.getOptions() })
                echo("done")
            elif sText.startswith("/-- "):

Modified gc_core/py/ibdawg.py from [303901f49a] to [b249dfb9bf].

52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
...
170
171
172
173
174
175
176









177
178
179
180
181
182
183
...
244
245
246
247
248
249
250
251
252
253
254
255
256
257
258
259
260
261
262
263
264
265
266









267
268
269
270
271
272
273
        if self.cStemming == "S":
            self.funcStemming = st.changeWordWithSuffixCode
        elif self.cStemming == "A":
            self.funcStemming = st.changeWordWithAffixCode
        else:
            self.funcStemming = st.noStemming
        self.nTag = self.nArcVal - self.nChar - self.nAff
        # <dChar> to get the value of an arc, <dArcVal> to get the char of an arc with its value
        self.dChar = {}
        for i in range(1, self.nChar):
            self.dChar[self.lArcVal[i]] = i
        self.dArcVal = { v: k  for k, v in self.dChar.items() }
            
        self._arcMask = (2 ** ((self.nBytesArc * 8) - 3)) - 1
        self._finalNodeMask = 1 << ((self.nBytesArc * 8) - 1)
        self._lastArcMask = 1 << ((self.nBytesArc * 8) - 2)
        self._addrBitMask = 1 << ((self.nBytesArc * 8) - 3)  # version 2

        self.nBytesOffset = 1 # version 3
................................................................................
        for c in sWord:
            if c not in self.dChar:
                return False
            iAddr = self._lookupArcNode(self.dChar[c], iAddr)
            if iAddr == None:
                return False
        return bool(int.from_bytes(self.byDic[iAddr:iAddr+self.nBytesArc], byteorder='big') & self._finalNodeMask)










    def suggest (self, sWord):
        "returns a set of similar words"
        # first, we check for similar words
        #return set(self._suggestWithCrushedUselessChars(cp.clearWord(sWord)))
        lSugg = self._suggest(sWord)
        if not lSugg:
................................................................................
            show(nDeep, cChar)
            lSugg.extend(self._suggestWithCrushedUselessChars(sWord[1:], nDeep+1, jAddr, sNewWord+cChar))
        return lSugg

    def _getSimilarArcsAndCrushedChars (self, cChar, iAddr):
        "generator: yield similar char of <cChar> and address of the following node"
        for nVal, jAddr in self._getArcs(iAddr):
            if self.dArcVal.get(nVal, "") in cp.aUselessChar:
                yield (self.dArcVal[nVal], jAddr)
        for c in cp.d1to1.get(cChar, [cChar]):
            if c in self.dChar:
                jAddr = self._lookupArcNode(self.dChar[c], iAddr)
                if jAddr:
                    yield (c, jAddr)

    def getMorph (self, sWord):
        "retrieves morphologies list, different casing allowed"
        l = self.morph(sWord)
        if sWord[0:1].isupper():
            l.extend(self.morph(sWord.lower()))
            if sWord.isupper() and len(sWord) > 1:
                l.extend(self.morph(sWord.capitalize()))
        return l










    # def morph (self, sWord):
    #     is defined in __init__

    # VERSION 1
    def _morph1 (self, sWord):
        "returns morphologies of sWord"







|



|







 







>
>
>
>
>
>
>
>
>







 







|
|






|
|
|
|
|
|
|
|
>
>
>
>
>
>
>
>
>







52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
...
170
171
172
173
174
175
176
177
178
179
180
181
182
183
184
185
186
187
188
189
190
191
192
...
253
254
255
256
257
258
259
260
261
262
263
264
265
266
267
268
269
270
271
272
273
274
275
276
277
278
279
280
281
282
283
284
285
286
287
288
289
290
291
        if self.cStemming == "S":
            self.funcStemming = st.changeWordWithSuffixCode
        elif self.cStemming == "A":
            self.funcStemming = st.changeWordWithAffixCode
        else:
            self.funcStemming = st.noStemming
        self.nTag = self.nArcVal - self.nChar - self.nAff
        # <dChar> to get the value of an arc, <dCharVal> to get the char of an arc with its value
        self.dChar = {}
        for i in range(1, self.nChar):
            self.dChar[self.lArcVal[i]] = i
        self.dCharVal = { v: k  for k, v in self.dChar.items() }
            
        self._arcMask = (2 ** ((self.nBytesArc * 8) - 3)) - 1
        self._finalNodeMask = 1 << ((self.nBytesArc * 8) - 1)
        self._lastArcMask = 1 << ((self.nBytesArc * 8) - 2)
        self._addrBitMask = 1 << ((self.nBytesArc * 8) - 3)  # version 2

        self.nBytesOffset = 1 # version 3
................................................................................
        for c in sWord:
            if c not in self.dChar:
                return False
            iAddr = self._lookupArcNode(self.dChar[c], iAddr)
            if iAddr == None:
                return False
        return bool(int.from_bytes(self.byDic[iAddr:iAddr+self.nBytesArc], byteorder='big') & self._finalNodeMask)

    def getMorph (self, sWord):
        "retrieves morphologies list, different casing allowed"
        l = self.morph(sWord)
        if sWord[0:1].isupper():
            l.extend(self.morph(sWord.lower()))
            if sWord.isupper() and len(sWord) > 1:
                l.extend(self.morph(sWord.capitalize()))
        return l

    def suggest (self, sWord):
        "returns a set of similar words"
        # first, we check for similar words
        #return set(self._suggestWithCrushedUselessChars(cp.clearWord(sWord)))
        lSugg = self._suggest(sWord)
        if not lSugg:
................................................................................
            show(nDeep, cChar)
            lSugg.extend(self._suggestWithCrushedUselessChars(sWord[1:], nDeep+1, jAddr, sNewWord+cChar))
        return lSugg

    def _getSimilarArcsAndCrushedChars (self, cChar, iAddr):
        "generator: yield similar char of <cChar> and address of the following node"
        for nVal, jAddr in self._getArcs(iAddr):
            if self.dCharVal.get(nVal, None) in cp.aUselessChar:
                yield (self.dCharVal[nVal], jAddr)
        for c in cp.d1to1.get(cChar, [cChar]):
            if c in self.dChar:
                jAddr = self._lookupArcNode(self.dChar[c], iAddr)
                if jAddr:
                    yield (c, jAddr)

    def drawPath (self, sWord, iAddr=0):
        if not sWord:
            return
        iPos = -1
        n = 0
        print(sWord[0:1] + ": ", end="")
        for nVal, jAddr in self._getArcs(iAddr):
            if nVal in self.dCharVal:
                print(self.dCharVal[nVal], end="")
                if self.dCharVal[nVal] == sWord[0:1]:
                    iNextNodeAddr = jAddr
                    iPos = n
                n += 1
        if iPos >= 0:
            print("\n   "+ " " * iPos + "|")
            self.drawPath(sWord[1:], iNextNodeAddr)


    # def morph (self, sWord):
    #     is defined in __init__

    # VERSION 1
    def _morph1 (self, sWord):
        "returns morphologies of sWord"