Grammalecte  Check-in [efd7bb0171]

Overview
Comment:[graphspell][py] ibdawg: init directly from an object (JSON)
Downloads: Tarball | ZIP archive | SQL archive
Timelines: family | ancestors | descendants | both | graphspell | multid
Files: files | file ages | folders
SHA3-256: efd7bb0171c2350d26fe3371251ea666afae0705b63fd7eab6dcc96510694490
User & Date: olr on 2018-02-28 10:34:28
Other Links: branch diff | manifest | tags
Context
2018-02-28
15:28
[graphspell][py] dawg: get JSON object instead of JSON string check-in: 1b8133065d user: olr tags: graphspell, multid
10:34
[graphspell][py] ibdawg: init directly from an object (JSON) check-in: efd7bb0171 user: olr tags: graphspell, multid
07:50
merge trunk check-in: a973e9aad8 user: olr tags: multid
Changes

Modified graphspell/ibdawg.py from [c41b426a86] to [145a26a0d0].

75
76
77
78
79
80
81
82

83
84
85
86
87
88
89
90

91
92


93
94
95
96
97
98
99
100
101
...
165
166
167
168
169
170
171
172
173
174
175
176
177
178
179
180
181
182
        self.aSugg.clear()
        self.dSugg.clear()


class IBDAWG:
    """INDEXABLE BINARY DIRECT ACYCLIC WORD GRAPH"""

    def __init__ (self, sfDict):

        self.by = pkgutil.get_data(__package__, "_dictionaries/" + sfDict)
        if not self.by:
            raise OSError("# Error. File not found or not loadable: "+sfDict)

        if sfDict.endswith(".bdic"):
            self._initBinary()
        elif sfDict.endswith(".json"):
            self._initJSON()

        else:
            raise OSError("# Error. Unknown file type: "+sfDict)



        self.sFileName = sfDict

        self._arcMask = (2 ** ((self.nBytesArc * 8) - 3)) - 1
        self._finalNodeMask = 1 << ((self.nBytesArc * 8) - 1)
        self._lastArcMask = 1 << ((self.nBytesArc * 8) - 2)
        self._addrBitMask = 1 << ((self.nBytesArc * 8) - 3)  # version 2

        # function to decode the affix/suffix code
................................................................................
        # <dChar> to get the value of an arc, <dCharVal> to get the char of an arc with its value
        self.dChar = {}
        for i in range(1, self.nChar+1):
            self.dChar[self.lArcVal[i]] = i
        self.dCharVal = { v: k  for k, v in self.dChar.items() }
        self.nBytesOffset = 1 # version 3

    def _initJSON (self):
        "initialize with a JSON text file"
        self.__dict__.update(json.loads(self.by.decode("utf-8")))
        #self.__dict__.update(json.loads(self.by))                  # In Python 3.6, can read directly binary strings
        self.byDic = binascii.unhexlify(self.sByDic)

    def getInfo (self):
        return  "  Language: {0.sLangName}   Lang code: {0.sLangCode}   Dictionary name: {0.sDicName}" \
                "  Compression method: {0.nCompressionMethod:>2}   Date: {0.sDate}   Stemming: {0.cStemming}FX\n" \
                "  Arcs values:  {0.nArcVal:>10,} = {0.nChar:>5,} characters,  {0.nAff:>6,} affixes,  {0.nTag:>6,} tags\n" \
                "  Dictionary: {0.nEntry:>12,} entries,    {0.nNode:>11,} nodes,   {0.nArc:>11,} arcs\n" \







|
>
|
|
|

|
|
|
<
>
|
|
>
>

|







 







|

|
<







75
76
77
78
79
80
81
82
83
84
85
86
87
88
89
90

91
92
93
94
95
96
97
98
99
100
101
102
103
104
...
168
169
170
171
172
173
174
175
176
177

178
179
180
181
182
183
184
        self.aSugg.clear()
        self.dSugg.clear()


class IBDAWG:
    """INDEXABLE BINARY DIRECT ACYCLIC WORD GRAPH"""

    def __init__ (self, source):
        if type(source) is str:
            self.by = pkgutil.get_data(__package__, "_dictionaries/" + source)
            if not self.by:
                raise OSError("# Error. File not found or not loadable: "+source)

            if source.endswith(".bdic"):
                self._initBinary()
            elif source.endswith(".json"):

                self._initJSON(json.loads(self.by.decode("utf-8")))     #json.loads(self.by)    # In Python 3.6, can read directly binary strings
            else:
                raise OSError("# Error. Unknown file type: "+source)
        else:
            self._initJSON(source)

        self.sFileName = source  if type(source) is str  else "[None]"

        self._arcMask = (2 ** ((self.nBytesArc * 8) - 3)) - 1
        self._finalNodeMask = 1 << ((self.nBytesArc * 8) - 1)
        self._lastArcMask = 1 << ((self.nBytesArc * 8) - 2)
        self._addrBitMask = 1 << ((self.nBytesArc * 8) - 3)  # version 2

        # function to decode the affix/suffix code
................................................................................
        # <dChar> to get the value of an arc, <dCharVal> to get the char of an arc with its value
        self.dChar = {}
        for i in range(1, self.nChar+1):
            self.dChar[self.lArcVal[i]] = i
        self.dCharVal = { v: k  for k, v in self.dChar.items() }
        self.nBytesOffset = 1 # version 3

    def _initJSON (self, oJSON):
        "initialize with a JSON text file"
        self.__dict__.update(oJSON)

        self.byDic = binascii.unhexlify(self.sByDic)

    def getInfo (self):
        return  "  Language: {0.sLangName}   Lang code: {0.sLangCode}   Dictionary name: {0.sDicName}" \
                "  Compression method: {0.nCompressionMethod:>2}   Date: {0.sDate}   Stemming: {0.cStemming}FX\n" \
                "  Arcs values:  {0.nArcVal:>10,} = {0.nChar:>5,} characters,  {0.nAff:>6,} affixes,  {0.nTag:>6,} tags\n" \
                "  Dictionary: {0.nEntry:>12,} entries,    {0.nNode:>11,} nodes,   {0.nArc:>11,} arcs\n" \